OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package com.dom_distiller.client; | 5 package com.dom_distiller.client; |
6 | 6 |
7 import com.google.gwt.dom.client.AnchorElement; | 7 import com.google.gwt.dom.client.AnchorElement; |
8 import com.google.gwt.dom.client.Document; | 8 import com.google.gwt.dom.client.Document; |
9 import com.google.gwt.dom.client.Element; | 9 import com.google.gwt.dom.client.Element; |
10 import com.google.gwt.dom.client.ImageElement; | 10 import com.google.gwt.dom.client.ImageElement; |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
66 List<Node> contentNodes = getContentNodesForTextDocument(document, textN
odes); | 66 List<Node> contentNodes = getContentNodesForTextDocument(document, textN
odes); |
67 | 67 |
68 List<Node> contentAndImages = RelevantImageFinder.findAndAddImages( | 68 List<Node> contentAndImages = RelevantImageFinder.findAndAddImages( |
69 contentNodes, Document.get().getDocumentElement()); | 69 contentNodes, Document.get().getDocumentElement()); |
70 | 70 |
71 if (contentAndImages.isEmpty()) { | 71 if (contentAndImages.isEmpty()) { |
72 return ""; | 72 return ""; |
73 } | 73 } |
74 | 74 |
75 Node clonedSubtree = NodeListExpander.expand(contentAndImages).cloneSubt
ree(); | 75 Node clonedSubtree = NodeListExpander.expand(contentAndImages).cloneSubt
ree(); |
76 | |
77 if (clonedSubtree.getNodeType() != Node.ELEMENT_NODE) { | 76 if (clonedSubtree.getNodeType() != Node.ELEMENT_NODE) { |
78 return ""; | 77 return ""; |
79 } | 78 } |
80 | 79 |
81 // The base URL in the distilled page viewer is different from that in | 80 // The base URL in the distilled page viewer is different from that in |
82 // the live page. This breaks all relative links (in anchors and | 81 // the live page. This breaks all relative links (in anchors and |
83 // images), so make them absolute in the distilled content. | 82 // images), so make them absolute in the distilled content. |
84 makeAllLinksAbsolute(clonedSubtree); | 83 makeAllLinksAbsolute(clonedSubtree); |
85 | 84 |
86 // TODO(cjhopman): this discards the top element and just returns its ch
ildren. This might | 85 // TODO(cjhopman): this discards the top element and just returns its ch
ildren. This might |
87 // break in some cases. | 86 // break in some cases. |
88 return Element.as(clonedSubtree).getInnerHTML(); | 87 return Element.as(clonedSubtree).getInnerHTML(); |
89 } | 88 } |
90 | 89 |
91 private static List<Node> parse(Element e, ContentHandler handler) { | 90 private static List<Node> parse(Element e, ContentHandler handler) { |
92 DomToSaxVisitor domToSaxVisitor = new DomToSaxVisitor(handler); | 91 DomToSaxVisitor domToSaxVisitor = new DomToSaxVisitor(handler); |
93 FilteringDomVisitor filteringDomVisitor = new FilteringDomVisitor(domToS
axVisitor); | 92 FilteringDomVisitor filteringDomVisitor = new FilteringDomVisitor(domToS
axVisitor); |
94 new DomWalker(filteringDomVisitor).walk(e); | 93 new DomWalker(filteringDomVisitor).walk(e); |
95 return domToSaxVisitor.getTextNodes(); | 94 return domToSaxVisitor.getTextNodes(); |
96 } | 95 } |
97 | 96 |
98 private static List<Node> getContentNodesForTextDocument( | 97 private static List<Node> getContentNodesForTextDocument( |
99 TextDocument document, List<Node> textNodes) { | 98 TextDocument document, List<Node> textNodes) { |
100 List<Integer> contentTextIndexes = new ArrayList<Integer>(); | 99 List<Integer> contentTextIndexes = new ArrayList<Integer>(); |
101 for (TextBlock tb : document.getTextBlocks()) { | 100 for (TextBlock tb : document.getTextBlocks()) { |
| 101 if (!tb.isContent()) { |
| 102 continue; |
| 103 } |
102 if (!tb.hasLabel(DefaultLabels.TITLE)) { | 104 if (!tb.hasLabel(DefaultLabels.TITLE)) { |
103 contentTextIndexes.addAll(tb.getContainedTextElements()); | 105 contentTextIndexes.addAll(tb.getContainedTextElements()); |
104 } | 106 } |
105 } | 107 } |
106 Collections.sort(contentTextIndexes); | 108 Collections.sort(contentTextIndexes); |
107 | 109 |
108 // Boilerpipe's text node indexes start at 1. | 110 // Boilerpipe's text node indexes start at 1. |
109 List<Node> contentNodes = new ArrayList<Node>(contentTextIndexes.size())
; | 111 List<Node> contentNodes = new ArrayList<Node>(contentTextIndexes.size())
; |
110 for (Integer i : contentTextIndexes) { | 112 for (Integer i : contentTextIndexes) { |
111 contentNodes.add(textNodes.get(i - 1)); | 113 contentNodes.add(textNodes.get(i - 1)); |
(...skipping 13 matching lines...) Expand all Loading... |
125 link.setHref(link.getHref()); | 127 link.setHref(link.getHref()); |
126 } | 128 } |
127 | 129 |
128 NodeList<Element> allImages = root.getElementsByTagName("IMG"); | 130 NodeList<Element> allImages = root.getElementsByTagName("IMG"); |
129 for (int i = 0; i < allImages.getLength(); i++) { | 131 for (int i = 0; i < allImages.getLength(); i++) { |
130 ImageElement image = ImageElement.as(allImages.getItem(i)); | 132 ImageElement image = ImageElement.as(allImages.getItem(i)); |
131 image.setSrc(image.getSrc()); | 133 image.setSrc(image.getSrc()); |
132 } | 134 } |
133 } | 135 } |
134 } | 136 } |
OLD | NEW |