OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller.webdocument; | 5 package org.chromium.distiller.webdocument; |
6 | 6 |
7 import org.chromium.distiller.DomUtil; | 7 import org.chromium.distiller.DomUtil; |
8 import org.chromium.distiller.DomWalker; | 8 import org.chromium.distiller.DomWalker; |
| 9 import org.chromium.distiller.JavaScript; |
9 import org.chromium.distiller.LogUtil; | 10 import org.chromium.distiller.LogUtil; |
10 import org.chromium.distiller.TableClassifier; | 11 import org.chromium.distiller.TableClassifier; |
11 | 12 |
12 import com.google.gwt.dom.client.Element; | 13 import com.google.gwt.dom.client.Element; |
13 import com.google.gwt.dom.client.Node; | 14 import com.google.gwt.dom.client.Node; |
14 import com.google.gwt.dom.client.Style; | 15 import com.google.gwt.dom.client.Style; |
15 import com.google.gwt.dom.client.TableElement; | 16 import com.google.gwt.dom.client.TableElement; |
16 import com.google.gwt.dom.client.Text; | 17 import com.google.gwt.dom.client.Text; |
17 import org.chromium.distiller.extractors.embeds.EmbedExtractor; | 18 import org.chromium.distiller.extractors.embeds.EmbedExtractor; |
18 import org.chromium.distiller.extractors.embeds.ImageExtractor; | 19 import org.chromium.distiller.extractors.embeds.ImageExtractor; |
(...skipping 11 matching lines...) Expand all Loading... |
30 * elements that should not be in the created document. Some of these skipped el
ements (hidden | 31 * elements that should not be in the created document. Some of these skipped el
ements (hidden |
31 * elements and data tables) are available for retrieval after processing. | 32 * elements and data tables) are available for retrieval after processing. |
32 */ | 33 */ |
33 public class DomConverter implements DomWalker.Visitor { | 34 public class DomConverter implements DomWalker.Visitor { |
34 private final WebDocumentBuilderInterface builder; | 35 private final WebDocumentBuilderInterface builder; |
35 private final Set<Node> hiddenElements; | 36 private final Set<Node> hiddenElements; |
36 private final List<EmbedExtractor> extractors; | 37 private final List<EmbedExtractor> extractors; |
37 // For quick lookup of tags that could possibly be embeds. | 38 // For quick lookup of tags that could possibly be embeds. |
38 private final HashSet<String> embedTagNames; | 39 private final HashSet<String> embedTagNames; |
39 | 40 |
| 41 private boolean isMobileFriendly; |
| 42 private boolean hasArticleElement; |
| 43 |
40 public DomConverter(WebDocumentBuilderInterface builder) { | 44 public DomConverter(WebDocumentBuilderInterface builder) { |
41 hiddenElements = new HashSet<Node>(); | 45 hiddenElements = new HashSet<>(); |
42 this.builder = builder; | 46 this.builder = builder; |
43 | 47 |
44 extractors = new ArrayList<EmbedExtractor>(); | 48 extractors = new ArrayList<>(); |
45 extractors.add(new ImageExtractor()); | 49 extractors.add(new ImageExtractor()); |
46 extractors.add(new TwitterExtractor()); | 50 extractors.add(new TwitterExtractor()); |
47 extractors.add(new VimeoExtractor()); | 51 extractors.add(new VimeoExtractor()); |
48 extractors.add(new YouTubeExtractor()); | 52 extractors.add(new YouTubeExtractor()); |
49 | 53 |
50 embedTagNames = new HashSet<>(); | 54 embedTagNames = new HashSet<>(); |
51 for (EmbedExtractor extractor : extractors) { | 55 for (EmbedExtractor extractor : extractors) { |
52 embedTagNames.addAll(extractor.getRelevantTagNames()); | 56 embedTagNames.addAll(extractor.getRelevantTagNames()); |
53 } | 57 } |
54 } | 58 } |
55 | 59 |
| 60 public void setIsMobileFriendly(boolean mobileFriendly) { |
| 61 isMobileFriendly = mobileFriendly; |
| 62 } |
| 63 |
| 64 public void setHasArticleElement(boolean hasArticle) { |
| 65 hasArticleElement = hasArticle; |
| 66 } |
| 67 |
56 public final Set<Node> getHiddenElements() { | 68 public final Set<Node> getHiddenElements() { |
57 return hiddenElements; | 69 return hiddenElements; |
58 } | 70 } |
59 | 71 |
60 @Override | 72 @Override |
61 public void skip(Element e) { | 73 public void skip(Element e) { |
62 builder.skipElement(e); | 74 builder.skipElement(e); |
63 } | 75 } |
64 | 76 |
65 @Override | 77 @Override |
66 public boolean visit(Node n) { | 78 public boolean visit(Node n) { |
67 switch (n.getNodeType()) { | 79 switch (n.getNodeType()) { |
68 case Node.TEXT_NODE: | 80 case Node.TEXT_NODE: |
69 builder.textNode(Text.as(n)); | 81 builder.textNode(Text.as(n)); |
70 return false; | 82 return false; |
71 case Node.ELEMENT_NODE: | 83 case Node.ELEMENT_NODE: |
72 return visitElement(Element.as(n)); | 84 return visitElement(Element.as(n)); |
73 default: | 85 default: |
74 return false; | 86 return false; |
75 } | 87 } |
76 } | 88 } |
77 | 89 |
78 private boolean visitElement(Element e) { | 90 private boolean visitElement(Element e) { |
79 // Skip invisible or uninteresting elements. | 91 // Skip invisible or uninteresting elements. |
80 boolean visible = DomUtil.isVisible(e); | 92 boolean visible = DomUtil.isVisible(e); |
81 logVisibilityInfo(e, visible); | 93 boolean keepAnyway = false; |
82 if (!visible) { | 94 if (!visible) { |
| 95 if (isMobileFriendly && hasArticleElement && DomUtil.hasClassName(e,
"hidden")) { |
| 96 // Process more hidden elements in a marked article in mobile-fr
iendly pages |
| 97 // because some sites hide the lower part of the article. |
| 98 // See crbug.com/599121 |
| 99 keepAnyway = true; |
| 100 } |
| 101 } |
| 102 logVisibilityInfo(e, visible || keepAnyway); |
| 103 if (!visible && !keepAnyway) { |
83 hiddenElements.add(e); | 104 hiddenElements.add(e); |
84 return false; | 105 return false; |
85 } | 106 } |
86 | 107 |
87 // Node-type specific extractors check for elements they are interested
in here. Everything | 108 // Node-type specific extractors check for elements they are interested
in here. Everything |
88 // else will be filtered through the switch below. | 109 // else will be filtered through the switch below. |
89 | 110 |
90 // Check for embedded elements that might be extracted. | 111 // Check for embedded elements that might be extracted. |
91 if (embedTagNames.contains(e.getTagName())) { | 112 if (embedTagNames.contains(e.getTagName())) { |
92 // If the tag is marked as interesting, check the extractors. | 113 // If the tag is marked as interesting, check the extractors. |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
171 Element parent = e.getParentElement(); | 192 Element parent = e.getParentElement(); |
172 LogUtil.logToConsole("TABLE: " + type + | 193 LogUtil.logToConsole("TABLE: " + type + |
173 ", id=" + e.getId() + | 194 ", id=" + e.getId() + |
174 ", class=" + e.getClassName() + | 195 ", class=" + e.getClassName() + |
175 ", parent=[" + parent.getTagName() + | 196 ", parent=[" + parent.getTagName() + |
176 ", id=" + parent.getId() + | 197 ", id=" + parent.getId() + |
177 ", class=" + parent.getClassName() + | 198 ", class=" + parent.getClassName() + |
178 "]"); | 199 "]"); |
179 } | 200 } |
180 } | 201 } |
OLD | NEW |