Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(90)

Side by Side Diff: java/org/chromium/distiller/webdocument/DomConverter.java

Issue 2267403008: Fix partially hidden article (Closed) Base URL: git@github.com:chromium/dom-distiller.git@master
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « java/org/chromium/distiller/ContentExtractor.java ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller.webdocument; 5 package org.chromium.distiller.webdocument;
6 6
7 import org.chromium.distiller.DomUtil; 7 import org.chromium.distiller.DomUtil;
8 import org.chromium.distiller.DomWalker; 8 import org.chromium.distiller.DomWalker;
9 import org.chromium.distiller.JavaScript;
9 import org.chromium.distiller.LogUtil; 10 import org.chromium.distiller.LogUtil;
10 import org.chromium.distiller.TableClassifier; 11 import org.chromium.distiller.TableClassifier;
11 12
12 import com.google.gwt.dom.client.Element; 13 import com.google.gwt.dom.client.Element;
13 import com.google.gwt.dom.client.Node; 14 import com.google.gwt.dom.client.Node;
14 import com.google.gwt.dom.client.Style; 15 import com.google.gwt.dom.client.Style;
15 import com.google.gwt.dom.client.TableElement; 16 import com.google.gwt.dom.client.TableElement;
16 import com.google.gwt.dom.client.Text; 17 import com.google.gwt.dom.client.Text;
17 import org.chromium.distiller.extractors.embeds.EmbedExtractor; 18 import org.chromium.distiller.extractors.embeds.EmbedExtractor;
18 import org.chromium.distiller.extractors.embeds.ImageExtractor; 19 import org.chromium.distiller.extractors.embeds.ImageExtractor;
(...skipping 11 matching lines...) Expand all
30 * elements that should not be in the created document. Some of these skipped el ements (hidden 31 * elements that should not be in the created document. Some of these skipped el ements (hidden
31 * elements and data tables) are available for retrieval after processing. 32 * elements and data tables) are available for retrieval after processing.
32 */ 33 */
33 public class DomConverter implements DomWalker.Visitor { 34 public class DomConverter implements DomWalker.Visitor {
34 private final WebDocumentBuilderInterface builder; 35 private final WebDocumentBuilderInterface builder;
35 private final Set<Node> hiddenElements; 36 private final Set<Node> hiddenElements;
36 private final List<EmbedExtractor> extractors; 37 private final List<EmbedExtractor> extractors;
37 // For quick lookup of tags that could possibly be embeds. 38 // For quick lookup of tags that could possibly be embeds.
38 private final HashSet<String> embedTagNames; 39 private final HashSet<String> embedTagNames;
39 40
41 private boolean isMobileFriendly;
42 private Element articleElement;
43
40 public DomConverter(WebDocumentBuilderInterface builder) { 44 public DomConverter(WebDocumentBuilderInterface builder) {
41 hiddenElements = new HashSet<Node>(); 45 hiddenElements = new HashSet<>();
42 this.builder = builder; 46 this.builder = builder;
43 47
44 extractors = new ArrayList<EmbedExtractor>(); 48 extractors = new ArrayList<>();
45 extractors.add(new ImageExtractor()); 49 extractors.add(new ImageExtractor());
46 extractors.add(new TwitterExtractor()); 50 extractors.add(new TwitterExtractor());
47 extractors.add(new VimeoExtractor()); 51 extractors.add(new VimeoExtractor());
48 extractors.add(new YouTubeExtractor()); 52 extractors.add(new YouTubeExtractor());
49 53
50 embedTagNames = new HashSet<>(); 54 embedTagNames = new HashSet<>();
51 for (EmbedExtractor extractor : extractors) { 55 for (EmbedExtractor extractor : extractors) {
52 embedTagNames.addAll(extractor.getRelevantTagNames()); 56 embedTagNames.addAll(extractor.getRelevantTagNames());
53 } 57 }
54 } 58 }
55 59
60 public void setIsMobileFriendly(boolean mobileFriendly) {
61 isMobileFriendly = mobileFriendly;
62 }
63
64 public void setArticleElement(Element article) {
65 articleElement = article;
66 }
67
56 public final Set<Node> getHiddenElements() { 68 public final Set<Node> getHiddenElements() {
57 return hiddenElements; 69 return hiddenElements;
58 } 70 }
59 71
60 @Override 72 @Override
61 public void skip(Element e) { 73 public void skip(Element e) {
62 builder.skipElement(e); 74 builder.skipElement(e);
63 } 75 }
64 76
65 @Override 77 @Override
66 public boolean visit(Node n) { 78 public boolean visit(Node n) {
67 switch (n.getNodeType()) { 79 switch (n.getNodeType()) {
68 case Node.TEXT_NODE: 80 case Node.TEXT_NODE:
69 builder.textNode(Text.as(n)); 81 builder.textNode(Text.as(n));
70 return false; 82 return false;
71 case Node.ELEMENT_NODE: 83 case Node.ELEMENT_NODE:
72 return visitElement(Element.as(n)); 84 return visitElement(Element.as(n));
73 default: 85 default:
74 return false; 86 return false;
75 } 87 }
76 } 88 }
77 89
78 private boolean visitElement(Element e) { 90 private boolean visitElement(Element e) {
79 // Skip invisible or uninteresting elements. 91 // Skip invisible or uninteresting elements.
80 boolean visible = DomUtil.isVisible(e); 92 boolean visible = DomUtil.isVisible(e);
81 logVisibilityInfo(e, visible); 93 boolean keepAnyway = false;
82 if (!visible) { 94 if (!visible) {
95 if (isMobileFriendly && articleElement != null && JavaScript.contain s(articleElement, e)
96 && DomUtil.hasClassName(e, "hidden")) {
mdjones 2016/08/24 22:45:51 Do you think it is possible/worth while to isolate
97 // Process more hidden elements in a marked article in mobile-fr iendly pages
98 // because some sites hide the lower part of the article.
99 // See crbug.com/599121
100 keepAnyway = true;
101 }
102 }
103 logVisibilityInfo(e, visible || keepAnyway);
104 if (!visible && !keepAnyway) {
83 hiddenElements.add(e); 105 hiddenElements.add(e);
84 return false; 106 return false;
85 } 107 }
86 108
87 // Node-type specific extractors check for elements they are interested in here. Everything 109 // Node-type specific extractors check for elements they are interested in here. Everything
88 // else will be filtered through the switch below. 110 // else will be filtered through the switch below.
89 111
90 // Check for embedded elements that might be extracted. 112 // Check for embedded elements that might be extracted.
91 if (embedTagNames.contains(e.getTagName())) { 113 if (embedTagNames.contains(e.getTagName())) {
92 // If the tag is marked as interesting, check the extractors. 114 // If the tag is marked as interesting, check the extractors.
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
171 Element parent = e.getParentElement(); 193 Element parent = e.getParentElement();
172 LogUtil.logToConsole("TABLE: " + type + 194 LogUtil.logToConsole("TABLE: " + type +
173 ", id=" + e.getId() + 195 ", id=" + e.getId() +
174 ", class=" + e.getClassName() + 196 ", class=" + e.getClassName() +
175 ", parent=[" + parent.getTagName() + 197 ", parent=[" + parent.getTagName() +
176 ", id=" + parent.getId() + 198 ", id=" + parent.getId() +
177 ", class=" + parent.getClassName() + 199 ", class=" + parent.getClassName() +
178 "]"); 200 "]");
179 } 201 }
180 } 202 }
OLDNEW
« no previous file with comments | « java/org/chromium/distiller/ContentExtractor.java ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698