Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller.webdocument; | 5 package org.chromium.distiller.webdocument; |
| 6 | 6 |
| 7 import org.chromium.distiller.DomUtil; | 7 import org.chromium.distiller.DomUtil; |
| 8 import org.chromium.distiller.DomWalker; | 8 import org.chromium.distiller.DomWalker; |
| 9 import org.chromium.distiller.LogUtil; | 9 import org.chromium.distiller.LogUtil; |
| 10 import org.chromium.distiller.TableClassifier; | 10 import org.chromium.distiller.TableClassifier; |
| 11 | 11 |
| 12 import com.google.gwt.dom.client.Element; | 12 import com.google.gwt.dom.client.Element; |
| 13 import com.google.gwt.dom.client.Node; | 13 import com.google.gwt.dom.client.Node; |
| 14 import com.google.gwt.dom.client.Style; | 14 import com.google.gwt.dom.client.Style; |
| 15 import com.google.gwt.dom.client.TableElement; | 15 import com.google.gwt.dom.client.TableElement; |
| 16 import com.google.gwt.dom.client.Text; | 16 import com.google.gwt.dom.client.Text; |
| 17 import org.chromium.distiller.extractors.embeds.EmbedExtractor; | 17 import org.chromium.distiller.extractors.embeds.EmbedExtractor; |
| 18 import org.chromium.distiller.extractors.embeds.ImageExtractor; | 18 import org.chromium.distiller.extractors.embeds.ImageExtractor; |
| 19 import org.chromium.distiller.extractors.embeds.TwitterExtractor; | 19 import org.chromium.distiller.extractors.embeds.TwitterExtractor; |
| 20 import org.chromium.distiller.extractors.embeds.VimeoExtractor; | 20 import org.chromium.distiller.extractors.embeds.VimeoExtractor; |
| 21 import org.chromium.distiller.extractors.embeds.YouTubeExtractor; | 21 import org.chromium.distiller.extractors.embeds.YouTubeExtractor; |
| 22 | |
| 23 import java.util.ArrayList; | 22 import java.util.ArrayList; |
| 24 import java.util.HashSet; | 23 import java.util.HashSet; |
| 25 import java.util.List; | 24 import java.util.List; |
| 26 import java.util.Set; | 25 import java.util.Set; |
| 27 | 26 |
| 28 /** | 27 /** |
| 29 * This DomWalker.Visitor creates a WebDocument from the walked DOM. It skips hi dden and other | 28 * This DomWalker.Visitor creates a WebDocument from the walked DOM. It skips hi dden and other |
| 30 * elements that should not be in the created document. Some of these skipped el ements (hidden | 29 * elements that should not be in the created document. Some of these skipped el ements (hidden |
| 31 * elements and data tables) are available for retrieval after processing. | 30 * elements and data tables) are available for retrieval after processing. |
| 32 */ | 31 */ |
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 93 for (EmbedExtractor extractor : extractors) { | 92 for (EmbedExtractor extractor : extractors) { |
| 94 WebElement embed = extractor.extract(e); | 93 WebElement embed = extractor.extract(e); |
| 95 if (embed != null) { | 94 if (embed != null) { |
| 96 builder.embed(embed); | 95 builder.embed(embed); |
| 97 return false; | 96 return false; |
| 98 } | 97 } |
| 99 } | 98 } |
| 100 } | 99 } |
| 101 | 100 |
| 102 switch (e.getTagName()) { | 101 switch (e.getTagName()) { |
| 102 case "OL": | |
| 103 builder.embed(new OLStart()); | |
|
mdjones
2015/07/29 17:12:16
I think using the "embed" method here is a bit amb
| |
| 104 break; | |
| 105 case "UL": | |
| 106 builder.embed(new ULStart()); | |
| 107 break; | |
| 108 case "LI": | |
| 109 builder.embed(new LIStart()); | |
| 110 break; | |
| 103 case "BR": | 111 case "BR": |
| 104 builder.lineBreak(e); | 112 builder.lineBreak(e); |
| 105 return false; | 113 return false; |
| 106 // Skip data tables, keep track of them to be extracted by RelevantE lementsFinder | 114 // Skip data tables, keep track of them to be extracted by RelevantE lementsFinder |
| 107 // later. | 115 // later. |
| 108 case "TABLE": | 116 case "TABLE": |
| 109 TableClassifier.Type type = TableClassifier.table(TableElement.a s(e)); | 117 TableClassifier.Type type = TableClassifier.table(TableElement.a s(e)); |
| 110 logTableInfo(e, type); | 118 logTableInfo(e, type); |
| 111 if (type == TableClassifier.Type.DATA) { | 119 if (type == TableClassifier.Type.DATA) { |
| 112 builder.dataTable(e); | 120 builder.dataTable(e); |
| (...skipping 22 matching lines...) Expand all Loading... | |
| 135 case "LINK": | 143 case "LINK": |
| 136 case "NOSCRIPT": | 144 case "NOSCRIPT": |
| 137 return false; | 145 return false; |
| 138 } | 146 } |
| 139 builder.startElement(e); | 147 builder.startElement(e); |
| 140 return true; | 148 return true; |
| 141 } | 149 } |
| 142 | 150 |
| 143 @Override | 151 @Override |
| 144 public void exit(Node n) { | 152 public void exit(Node n) { |
| 153 if (n.getNodeType() == Node.ELEMENT_NODE) { | |
| 154 Element e = Element.as(n); | |
| 155 switch (e.getTagName()) { | |
| 156 case "OL": | |
| 157 builder.embed(new OLEnd()); | |
| 158 break; | |
| 159 case "UL": | |
| 160 builder.embed(new ULEnd()); | |
| 161 break; | |
| 162 case "LI": | |
| 163 builder.embed(new LIEnd()); | |
| 164 break; | |
| 165 } | |
| 166 } | |
| 145 builder.endElement(); | 167 builder.endElement(); |
| 146 } | 168 } |
| 147 | 169 |
| 148 private static void logVisibilityInfo(Element e, boolean visible) { | 170 private static void logVisibilityInfo(Element e, boolean visible) { |
| 149 if (!LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) return; | 171 if (!LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) return; |
| 150 Style style = DomUtil.getComputedStyle(e); | 172 Style style = DomUtil.getComputedStyle(e); |
| 151 LogUtil.logToConsole((visible ? "KEEP " : "SKIP ") + e.getTagName() + | 173 LogUtil.logToConsole((visible ? "KEEP " : "SKIP ") + e.getTagName() + |
| 152 ": id=" + e.getId() + | 174 ": id=" + e.getId() + |
| 153 ", dsp=" + style.getDisplay() + | 175 ", dsp=" + style.getDisplay() + |
| 154 ", vis=" + style.getVisibility() + | 176 ", vis=" + style.getVisibility() + |
| 155 ", opaq=" + style.getOpacity()); | 177 ", opaq=" + style.getOpacity()); |
| 156 } | 178 } |
| 157 | 179 |
| 158 private static void logTableInfo(Element e, TableClassifier.Type type) { | 180 private static void logTableInfo(Element e, TableClassifier.Type type) { |
| 159 if (!LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) return; | 181 if (!LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) return; |
| 160 Element parent = e.getParentElement(); | 182 Element parent = e.getParentElement(); |
| 161 LogUtil.logToConsole("TABLE: " + type + | 183 LogUtil.logToConsole("TABLE: " + type + |
| 162 ", id=" + e.getId() + | 184 ", id=" + e.getId() + |
| 163 ", class=" + e.getClassName() + | 185 ", class=" + e.getClassName() + |
| 164 ", parent=[" + parent.getTagName() + | 186 ", parent=[" + parent.getTagName() + |
| 165 ", id=" + parent.getId() + | 187 ", id=" + parent.getId() + |
| 166 ", class=" + parent.getClassName() + | 188 ", class=" + parent.getClassName() + |
| 167 "]"); | 189 "]"); |
| 168 } | 190 } |
| 169 } | 191 } |
| OLD | NEW |