OLD | NEW |
---|---|
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller.webdocument; | 5 package org.chromium.distiller.webdocument; |
6 | 6 |
7 import org.chromium.distiller.DomUtil; | 7 import org.chromium.distiller.DomUtil; |
8 import org.chromium.distiller.DomWalker; | 8 import org.chromium.distiller.DomWalker; |
9 import org.chromium.distiller.JavaScript; | |
9 import org.chromium.distiller.LogUtil; | 10 import org.chromium.distiller.LogUtil; |
10 import org.chromium.distiller.TableClassifier; | 11 import org.chromium.distiller.TableClassifier; |
11 | 12 |
12 import com.google.gwt.dom.client.Element; | 13 import com.google.gwt.dom.client.Element; |
13 import com.google.gwt.dom.client.Node; | 14 import com.google.gwt.dom.client.Node; |
14 import com.google.gwt.dom.client.Style; | 15 import com.google.gwt.dom.client.Style; |
15 import com.google.gwt.dom.client.TableElement; | 16 import com.google.gwt.dom.client.TableElement; |
16 import com.google.gwt.dom.client.Text; | 17 import com.google.gwt.dom.client.Text; |
17 import org.chromium.distiller.extractors.embeds.EmbedExtractor; | 18 import org.chromium.distiller.extractors.embeds.EmbedExtractor; |
18 import org.chromium.distiller.extractors.embeds.ImageExtractor; | 19 import org.chromium.distiller.extractors.embeds.ImageExtractor; |
(...skipping 11 matching lines...) Expand all Loading... | |
30 * elements that should not be in the created document. Some of these skipped el ements (hidden | 31 * elements that should not be in the created document. Some of these skipped el ements (hidden |
31 * elements and data tables) are available for retrieval after processing. | 32 * elements and data tables) are available for retrieval after processing. |
32 */ | 33 */ |
33 public class DomConverter implements DomWalker.Visitor { | 34 public class DomConverter implements DomWalker.Visitor { |
34 private final WebDocumentBuilderInterface builder; | 35 private final WebDocumentBuilderInterface builder; |
35 private final Set<Node> hiddenElements; | 36 private final Set<Node> hiddenElements; |
36 private final List<EmbedExtractor> extractors; | 37 private final List<EmbedExtractor> extractors; |
37 // For quick lookup of tags that could possibly be embeds. | 38 // For quick lookup of tags that could possibly be embeds. |
38 private final HashSet<String> embedTagNames; | 39 private final HashSet<String> embedTagNames; |
39 | 40 |
41 private boolean isMobileFriendly; | |
42 private Element articleElement; | |
43 | |
40 public DomConverter(WebDocumentBuilderInterface builder) { | 44 public DomConverter(WebDocumentBuilderInterface builder) { |
41 hiddenElements = new HashSet<Node>(); | 45 hiddenElements = new HashSet<>(); |
42 this.builder = builder; | 46 this.builder = builder; |
43 | 47 |
44 extractors = new ArrayList<EmbedExtractor>(); | 48 extractors = new ArrayList<>(); |
45 extractors.add(new ImageExtractor()); | 49 extractors.add(new ImageExtractor()); |
46 extractors.add(new TwitterExtractor()); | 50 extractors.add(new TwitterExtractor()); |
47 extractors.add(new VimeoExtractor()); | 51 extractors.add(new VimeoExtractor()); |
48 extractors.add(new YouTubeExtractor()); | 52 extractors.add(new YouTubeExtractor()); |
49 | 53 |
50 embedTagNames = new HashSet<>(); | 54 embedTagNames = new HashSet<>(); |
51 for (EmbedExtractor extractor : extractors) { | 55 for (EmbedExtractor extractor : extractors) { |
52 embedTagNames.addAll(extractor.getRelevantTagNames()); | 56 embedTagNames.addAll(extractor.getRelevantTagNames()); |
53 } | 57 } |
54 } | 58 } |
55 | 59 |
60 public void setIsMobileFriendly(boolean mobileFriendly) { | |
61 isMobileFriendly = mobileFriendly; | |
62 } | |
63 | |
64 public void setArticleElement(Element article) { | |
65 articleElement = article; | |
66 } | |
67 | |
56 public final Set<Node> getHiddenElements() { | 68 public final Set<Node> getHiddenElements() { |
57 return hiddenElements; | 69 return hiddenElements; |
58 } | 70 } |
59 | 71 |
60 @Override | 72 @Override |
61 public void skip(Element e) { | 73 public void skip(Element e) { |
62 builder.skipElement(e); | 74 builder.skipElement(e); |
63 } | 75 } |
64 | 76 |
65 @Override | 77 @Override |
66 public boolean visit(Node n) { | 78 public boolean visit(Node n) { |
67 switch (n.getNodeType()) { | 79 switch (n.getNodeType()) { |
68 case Node.TEXT_NODE: | 80 case Node.TEXT_NODE: |
69 builder.textNode(Text.as(n)); | 81 builder.textNode(Text.as(n)); |
70 return false; | 82 return false; |
71 case Node.ELEMENT_NODE: | 83 case Node.ELEMENT_NODE: |
72 return visitElement(Element.as(n)); | 84 return visitElement(Element.as(n)); |
73 default: | 85 default: |
74 return false; | 86 return false; |
75 } | 87 } |
76 } | 88 } |
77 | 89 |
78 private boolean visitElement(Element e) { | 90 private boolean visitElement(Element e) { |
79 // Skip invisible or uninteresting elements. | 91 // Skip invisible or uninteresting elements. |
80 boolean visible = DomUtil.isVisible(e); | 92 boolean visible = DomUtil.isVisible(e); |
81 logVisibilityInfo(e, visible); | 93 boolean keepAnyway = false; |
82 if (!visible) { | 94 if (!visible) { |
95 if (isMobileFriendly && articleElement != null && JavaScript.contain s(articleElement, e) | |
96 && DomUtil.hasClassName(e, "hidden")) { | |
mdjones
2016/08/24 22:45:51
Do you think it is possible/worth while to isolate
| |
97 // Process more hidden elements in a marked article in mobile-fr iendly pages | |
98 // because some sites hide the lower part of the article. | |
99 // See crbug.com/599121 | |
100 keepAnyway = true; | |
101 } | |
102 } | |
103 logVisibilityInfo(e, visible || keepAnyway); | |
104 if (!visible && !keepAnyway) { | |
83 hiddenElements.add(e); | 105 hiddenElements.add(e); |
84 return false; | 106 return false; |
85 } | 107 } |
86 | 108 |
87 // Node-type specific extractors check for elements they are interested in here. Everything | 109 // Node-type specific extractors check for elements they are interested in here. Everything |
88 // else will be filtered through the switch below. | 110 // else will be filtered through the switch below. |
89 | 111 |
90 // Check for embedded elements that might be extracted. | 112 // Check for embedded elements that might be extracted. |
91 if (embedTagNames.contains(e.getTagName())) { | 113 if (embedTagNames.contains(e.getTagName())) { |
92 // If the tag is marked as interesting, check the extractors. | 114 // If the tag is marked as interesting, check the extractors. |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
171 Element parent = e.getParentElement(); | 193 Element parent = e.getParentElement(); |
172 LogUtil.logToConsole("TABLE: " + type + | 194 LogUtil.logToConsole("TABLE: " + type + |
173 ", id=" + e.getId() + | 195 ", id=" + e.getId() + |
174 ", class=" + e.getClassName() + | 196 ", class=" + e.getClassName() + |
175 ", parent=[" + parent.getTagName() + | 197 ", parent=[" + parent.getTagName() + |
176 ", id=" + parent.getId() + | 198 ", id=" + parent.getId() + |
177 ", class=" + parent.getClassName() + | 199 ", class=" + parent.getClassName() + |
178 "]"); | 200 "]"); |
179 } | 201 } |
180 } | 202 } |
OLD | NEW |