OLD | NEW |
---|---|
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller.webdocument; | 5 package org.chromium.distiller.webdocument; |
6 | 6 |
7 import org.chromium.distiller.DomUtil; | 7 import org.chromium.distiller.DomUtil; |
8 import org.chromium.distiller.DomWalker; | 8 import org.chromium.distiller.DomWalker; |
9 import org.chromium.distiller.LogUtil; | 9 import org.chromium.distiller.LogUtil; |
10 import org.chromium.distiller.TableClassifier; | 10 import org.chromium.distiller.TableClassifier; |
11 | 11 |
12 import com.google.gwt.dom.client.Element; | 12 import com.google.gwt.dom.client.Element; |
13 import com.google.gwt.dom.client.Node; | 13 import com.google.gwt.dom.client.Node; |
14 import com.google.gwt.dom.client.Style; | 14 import com.google.gwt.dom.client.Style; |
15 import com.google.gwt.dom.client.TableElement; | 15 import com.google.gwt.dom.client.TableElement; |
16 import com.google.gwt.dom.client.Text; | 16 import com.google.gwt.dom.client.Text; |
17 import org.chromium.distiller.extractors.embeds.EmbedExtractor; | 17 import org.chromium.distiller.extractors.embeds.EmbedExtractor; |
18 import org.chromium.distiller.extractors.embeds.ImageExtractor; | 18 import org.chromium.distiller.extractors.embeds.ImageExtractor; |
19 import org.chromium.distiller.extractors.embeds.TwitterExtractor; | 19 import org.chromium.distiller.extractors.embeds.TwitterExtractor; |
20 import org.chromium.distiller.extractors.embeds.VimeoExtractor; | 20 import org.chromium.distiller.extractors.embeds.VimeoExtractor; |
21 import org.chromium.distiller.extractors.embeds.YouTubeExtractor; | 21 import org.chromium.distiller.extractors.embeds.YouTubeExtractor; |
22 | |
23 import java.util.ArrayList; | 22 import java.util.ArrayList; |
wychen
2015/08/01 01:00:20
nitpick: why deleting the empty line above?
| |
24 import java.util.HashSet; | 23 import java.util.HashSet; |
25 import java.util.List; | 24 import java.util.List; |
26 import java.util.Set; | 25 import java.util.Set; |
27 | 26 |
28 /** | 27 /** |
29 * This DomWalker.Visitor creates a WebDocument from the walked DOM. It skips hi dden and other | 28 * This DomWalker.Visitor creates a WebDocument from the walked DOM. It skips hi dden and other |
30 * elements that should not be in the created document. Some of these skipped el ements (hidden | 29 * elements that should not be in the created document. Some of these skipped el ements (hidden |
31 * elements and data tables) are available for retrieval after processing. | 30 * elements and data tables) are available for retrieval after processing. |
32 */ | 31 */ |
33 public class DomConverter implements DomWalker.Visitor { | 32 public class DomConverter implements DomWalker.Visitor { |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
93 for (EmbedExtractor extractor : extractors) { | 92 for (EmbedExtractor extractor : extractors) { |
94 WebElement embed = extractor.extract(e); | 93 WebElement embed = extractor.extract(e); |
95 if (embed != null) { | 94 if (embed != null) { |
96 builder.embed(embed); | 95 builder.embed(embed); |
97 return false; | 96 return false; |
98 } | 97 } |
99 } | 98 } |
100 } | 99 } |
101 | 100 |
102 switch (e.getTagName()) { | 101 switch (e.getTagName()) { |
102 case "OL": | |
103 case "UL": | |
104 case "LI": | |
105 builder.list(new PlaceHolder(e.getTagName(), PlaceHolder.TagType .START)); | |
106 break; | |
103 case "BR": | 107 case "BR": |
104 builder.lineBreak(e); | 108 builder.lineBreak(e); |
105 return false; | 109 return false; |
106 // Skip data tables, keep track of them to be extracted by RelevantE lementsFinder | 110 // Skip data tables, keep track of them to be extracted by RelevantE lementsFinder |
107 // later. | 111 // later. |
108 case "TABLE": | 112 case "TABLE": |
109 TableClassifier.Type type = TableClassifier.table(TableElement.a s(e)); | 113 TableClassifier.Type type = TableClassifier.table(TableElement.a s(e)); |
110 logTableInfo(e, type); | 114 logTableInfo(e, type); |
111 if (type == TableClassifier.Type.DATA) { | 115 if (type == TableClassifier.Type.DATA) { |
112 builder.dataTable(e); | 116 builder.dataTable(e); |
(...skipping 22 matching lines...) Expand all Loading... | |
135 case "LINK": | 139 case "LINK": |
136 case "NOSCRIPT": | 140 case "NOSCRIPT": |
137 return false; | 141 return false; |
138 } | 142 } |
139 builder.startElement(e); | 143 builder.startElement(e); |
140 return true; | 144 return true; |
141 } | 145 } |
142 | 146 |
143 @Override | 147 @Override |
144 public void exit(Node n) { | 148 public void exit(Node n) { |
149 if (n.getNodeType() == Node.ELEMENT_NODE) { | |
150 Element e = Element.as(n); | |
151 switch (e.getTagName()) { | |
152 case "OL": | |
153 case "UL": | |
154 case "LI": | |
155 builder.list(new PlaceHolder(e.getTagName(), PlaceHolder.Tag Type.END)); | |
156 break; | |
157 } | |
158 } | |
145 builder.endElement(); | 159 builder.endElement(); |
146 } | 160 } |
147 | 161 |
148 private static void logVisibilityInfo(Element e, boolean visible) { | 162 private static void logVisibilityInfo(Element e, boolean visible) { |
149 if (!LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) return; | 163 if (!LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) return; |
150 Style style = DomUtil.getComputedStyle(e); | 164 Style style = DomUtil.getComputedStyle(e); |
151 LogUtil.logToConsole((visible ? "KEEP " : "SKIP ") + e.getTagName() + | 165 LogUtil.logToConsole((visible ? "KEEP " : "SKIP ") + e.getTagName() + |
152 ": id=" + e.getId() + | 166 ": id=" + e.getId() + |
153 ", dsp=" + style.getDisplay() + | 167 ", dsp=" + style.getDisplay() + |
154 ", vis=" + style.getVisibility() + | 168 ", vis=" + style.getVisibility() + |
155 ", opaq=" + style.getOpacity()); | 169 ", opaq=" + style.getOpacity()); |
156 } | 170 } |
157 | 171 |
158 private static void logTableInfo(Element e, TableClassifier.Type type) { | 172 private static void logTableInfo(Element e, TableClassifier.Type type) { |
159 if (!LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) return; | 173 if (!LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) return; |
160 Element parent = e.getParentElement(); | 174 Element parent = e.getParentElement(); |
161 LogUtil.logToConsole("TABLE: " + type + | 175 LogUtil.logToConsole("TABLE: " + type + |
162 ", id=" + e.getId() + | 176 ", id=" + e.getId() + |
163 ", class=" + e.getClassName() + | 177 ", class=" + e.getClassName() + |
164 ", parent=[" + parent.getTagName() + | 178 ", parent=[" + parent.getTagName() + |
165 ", id=" + parent.getId() + | 179 ", id=" + parent.getId() + |
166 ", class=" + parent.getClassName() + | 180 ", class=" + parent.getClassName() + |
167 "]"); | 181 "]"); |
168 } | 182 } |
169 } | 183 } |
OLD | NEW |