Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller.webdocument; | 5 package org.chromium.distiller.webdocument; |
| 6 | 6 |
| 7 import org.chromium.distiller.DomUtil; | 7 import org.chromium.distiller.DomUtil; |
| 8 import org.chromium.distiller.DomWalker; | 8 import org.chromium.distiller.DomWalker; |
| 9 import org.chromium.distiller.JavaScript; | |
| 9 import org.chromium.distiller.LogUtil; | 10 import org.chromium.distiller.LogUtil; |
| 10 import org.chromium.distiller.TableClassifier; | 11 import org.chromium.distiller.TableClassifier; |
| 11 | 12 |
| 12 import com.google.gwt.dom.client.Element; | 13 import com.google.gwt.dom.client.Element; |
| 13 import com.google.gwt.dom.client.Node; | 14 import com.google.gwt.dom.client.Node; |
| 14 import com.google.gwt.dom.client.Style; | 15 import com.google.gwt.dom.client.Style; |
| 15 import com.google.gwt.dom.client.TableElement; | 16 import com.google.gwt.dom.client.TableElement; |
| 16 import com.google.gwt.dom.client.Text; | 17 import com.google.gwt.dom.client.Text; |
| 17 import org.chromium.distiller.extractors.embeds.EmbedExtractor; | 18 import org.chromium.distiller.extractors.embeds.EmbedExtractor; |
| 18 import org.chromium.distiller.extractors.embeds.ImageExtractor; | 19 import org.chromium.distiller.extractors.embeds.ImageExtractor; |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 30 * elements that should not be in the created document. Some of these skipped el ements (hidden | 31 * elements that should not be in the created document. Some of these skipped el ements (hidden |
| 31 * elements and data tables) are available for retrieval after processing. | 32 * elements and data tables) are available for retrieval after processing. |
| 32 */ | 33 */ |
| 33 public class DomConverter implements DomWalker.Visitor { | 34 public class DomConverter implements DomWalker.Visitor { |
| 34 private final WebDocumentBuilderInterface builder; | 35 private final WebDocumentBuilderInterface builder; |
| 35 private final Set<Node> hiddenElements; | 36 private final Set<Node> hiddenElements; |
| 36 private final List<EmbedExtractor> extractors; | 37 private final List<EmbedExtractor> extractors; |
| 37 // For quick lookup of tags that could possibly be embeds. | 38 // For quick lookup of tags that could possibly be embeds. |
| 38 private final HashSet<String> embedTagNames; | 39 private final HashSet<String> embedTagNames; |
| 39 | 40 |
| 41 private boolean isMobileFriendly; | |
| 42 private Element articleElement; | |
| 43 | |
| 40 public DomConverter(WebDocumentBuilderInterface builder) { | 44 public DomConverter(WebDocumentBuilderInterface builder) { |
| 41 hiddenElements = new HashSet<Node>(); | 45 hiddenElements = new HashSet<>(); |
| 42 this.builder = builder; | 46 this.builder = builder; |
| 43 | 47 |
| 44 extractors = new ArrayList<EmbedExtractor>(); | 48 extractors = new ArrayList<>(); |
| 45 extractors.add(new ImageExtractor()); | 49 extractors.add(new ImageExtractor()); |
| 46 extractors.add(new TwitterExtractor()); | 50 extractors.add(new TwitterExtractor()); |
| 47 extractors.add(new VimeoExtractor()); | 51 extractors.add(new VimeoExtractor()); |
| 48 extractors.add(new YouTubeExtractor()); | 52 extractors.add(new YouTubeExtractor()); |
| 49 | 53 |
| 50 embedTagNames = new HashSet<>(); | 54 embedTagNames = new HashSet<>(); |
| 51 for (EmbedExtractor extractor : extractors) { | 55 for (EmbedExtractor extractor : extractors) { |
| 52 embedTagNames.addAll(extractor.getRelevantTagNames()); | 56 embedTagNames.addAll(extractor.getRelevantTagNames()); |
| 53 } | 57 } |
| 54 } | 58 } |
| 55 | 59 |
| 60 public void setIsMobileFriendly(boolean mobileFriendly) { | |
| 61 isMobileFriendly = mobileFriendly; | |
| 62 } | |
| 63 | |
| 64 public void setArticleElement(Element article) { | |
| 65 articleElement = article; | |
| 66 } | |
| 67 | |
| 56 public final Set<Node> getHiddenElements() { | 68 public final Set<Node> getHiddenElements() { |
| 57 return hiddenElements; | 69 return hiddenElements; |
| 58 } | 70 } |
| 59 | 71 |
| 60 @Override | 72 @Override |
| 61 public void skip(Element e) { | 73 public void skip(Element e) { |
| 62 builder.skipElement(e); | 74 builder.skipElement(e); |
| 63 } | 75 } |
| 64 | 76 |
| 65 @Override | 77 @Override |
| 66 public boolean visit(Node n) { | 78 public boolean visit(Node n) { |
| 67 switch (n.getNodeType()) { | 79 switch (n.getNodeType()) { |
| 68 case Node.TEXT_NODE: | 80 case Node.TEXT_NODE: |
| 69 builder.textNode(Text.as(n)); | 81 builder.textNode(Text.as(n)); |
| 70 return false; | 82 return false; |
| 71 case Node.ELEMENT_NODE: | 83 case Node.ELEMENT_NODE: |
| 72 return visitElement(Element.as(n)); | 84 return visitElement(Element.as(n)); |
| 73 default: | 85 default: |
| 74 return false; | 86 return false; |
| 75 } | 87 } |
| 76 } | 88 } |
| 77 | 89 |
| 78 private boolean visitElement(Element e) { | 90 private boolean visitElement(Element e) { |
| 79 // Skip invisible or uninteresting elements. | 91 // Skip invisible or uninteresting elements. |
| 80 boolean visible = DomUtil.isVisible(e); | 92 boolean visible = DomUtil.isVisible(e); |
| 81 logVisibilityInfo(e, visible); | 93 boolean keepAnyway = false; |
| 82 if (!visible) { | 94 if (!visible) { |
| 95 if (isMobileFriendly && articleElement != null && JavaScript.contain s(articleElement, e) | |
| 96 && DomUtil.hasClassName(e, "hidden")) { | |
|
mdjones
2016/08/24 22:45:51
Do you think it is possible/worth while to isolate
| |
| 97 // Process more hidden elements in a marked article in mobile-fr iendly pages | |
| 98 // because some sites hide the lower part of the article. | |
| 99 // See crbug.com/599121 | |
| 100 keepAnyway = true; | |
| 101 } | |
| 102 } | |
| 103 logVisibilityInfo(e, visible || keepAnyway); | |
| 104 if (!visible && !keepAnyway) { | |
| 83 hiddenElements.add(e); | 105 hiddenElements.add(e); |
| 84 return false; | 106 return false; |
| 85 } | 107 } |
| 86 | 108 |
| 87 // Node-type specific extractors check for elements they are interested in here. Everything | 109 // Node-type specific extractors check for elements they are interested in here. Everything |
| 88 // else will be filtered through the switch below. | 110 // else will be filtered through the switch below. |
| 89 | 111 |
| 90 // Check for embedded elements that might be extracted. | 112 // Check for embedded elements that might be extracted. |
| 91 if (embedTagNames.contains(e.getTagName())) { | 113 if (embedTagNames.contains(e.getTagName())) { |
| 92 // If the tag is marked as interesting, check the extractors. | 114 // If the tag is marked as interesting, check the extractors. |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 171 Element parent = e.getParentElement(); | 193 Element parent = e.getParentElement(); |
| 172 LogUtil.logToConsole("TABLE: " + type + | 194 LogUtil.logToConsole("TABLE: " + type + |
| 173 ", id=" + e.getId() + | 195 ", id=" + e.getId() + |
| 174 ", class=" + e.getClassName() + | 196 ", class=" + e.getClassName() + |
| 175 ", parent=[" + parent.getTagName() + | 197 ", parent=[" + parent.getTagName() + |
| 176 ", id=" + parent.getId() + | 198 ", id=" + parent.getId() + |
| 177 ", class=" + parent.getClassName() + | 199 ", class=" + parent.getClassName() + |
| 178 "]"); | 200 "]"); |
| 179 } | 201 } |
| 180 } | 202 } |
| OLD | NEW |