OLD | NEW |
---|---|
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package com.dom_distiller.client; | 5 package com.dom_distiller.client; |
6 | 6 |
7 import com.google.gwt.dom.client.AnchorElement; | 7 import com.google.gwt.dom.client.AnchorElement; |
8 import com.google.gwt.dom.client.Document; | 8 import com.google.gwt.dom.client.Document; |
9 import com.google.gwt.dom.client.Element; | 9 import com.google.gwt.dom.client.Element; |
10 import com.google.gwt.dom.client.ImageElement; | 10 import com.google.gwt.dom.client.ImageElement; |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
66 List<Node> contentNodes = getContentNodesForTextDocument(document, textN odes); | 66 List<Node> contentNodes = getContentNodesForTextDocument(document, textN odes); |
67 | 67 |
68 List<Node> contentAndImages = RelevantImageFinder.findAndAddImages( | 68 List<Node> contentAndImages = RelevantImageFinder.findAndAddImages( |
69 contentNodes, Document.get().getDocumentElement()); | 69 contentNodes, Document.get().getDocumentElement()); |
70 | 70 |
71 if (contentAndImages.isEmpty()) { | 71 if (contentAndImages.isEmpty()) { |
72 return ""; | 72 return ""; |
73 } | 73 } |
74 | 74 |
75 Node clonedSubtree = NodeListExpander.expand(contentAndImages).cloneSubt ree(); | 75 Node clonedSubtree = NodeListExpander.expand(contentAndImages).cloneSubt ree(); |
76 | 76 LogUtil.logToConsole(clonedSubtree.getNodeType() + " node:" + Node.ELEME NT_NODE); |
cjhopman
2014/05/22 00:27:21
probably don't need this. If it's been useful, at
Yaron
2014/05/22 17:05:24
err right that was supposed to go. I was trying to
| |
77 if (clonedSubtree.getNodeType() != Node.ELEMENT_NODE) { | 77 if (clonedSubtree.getNodeType() != Node.ELEMENT_NODE) { |
78 return ""; | 78 return ""; |
79 } | 79 } |
80 | 80 |
81 // The base URL in the distilled page viewer is different from that in | 81 // The base URL in the distilled page viewer is different from that in |
82 // the live page. This breaks all relative links (in anchors and | 82 // the live page. This breaks all relative links (in anchors and |
83 // images), so make them absolute in the distilled content. | 83 // images), so make them absolute in the distilled content. |
84 makeAllLinksAbsolute(clonedSubtree); | 84 makeAllLinksAbsolute(clonedSubtree); |
85 | 85 |
86 // TODO(cjhopman): this discards the top element and just returns its ch ildren. This might | 86 // TODO(cjhopman): this discards the top element and just returns its ch ildren. This might |
87 // break in some cases. | 87 // break in some cases. |
88 return Element.as(clonedSubtree).getInnerHTML(); | 88 return Element.as(clonedSubtree).getInnerHTML(); |
89 } | 89 } |
90 | 90 |
91 private static List<Node> parse(Element e, ContentHandler handler) { | 91 private static List<Node> parse(Element e, ContentHandler handler) { |
92 DomToSaxVisitor domToSaxVisitor = new DomToSaxVisitor(handler); | 92 DomToSaxVisitor domToSaxVisitor = new DomToSaxVisitor(handler); |
93 FilteringDomVisitor filteringDomVisitor = new FilteringDomVisitor(domToS axVisitor); | 93 FilteringDomVisitor filteringDomVisitor = new FilteringDomVisitor(domToS axVisitor); |
94 new DomWalker(filteringDomVisitor).walk(e); | 94 new DomWalker(filteringDomVisitor).walk(e); |
95 return domToSaxVisitor.getTextNodes(); | 95 return domToSaxVisitor.getTextNodes(); |
96 } | 96 } |
97 | 97 |
98 private static List<Node> getContentNodesForTextDocument( | 98 private static List<Node> getContentNodesForTextDocument( |
99 TextDocument document, List<Node> textNodes) { | 99 TextDocument document, List<Node> textNodes) { |
100 List<Integer> contentTextIndexes = new ArrayList<Integer>(); | 100 List<Integer> contentTextIndexes = new ArrayList<Integer>(); |
101 for (TextBlock tb : document.getTextBlocks()) { | 101 for (TextBlock tb : document.getTextBlocks()) { |
102 if (!tb.isContent()) { | |
103 continue; | |
104 } | |
102 if (!tb.hasLabel(DefaultLabels.TITLE)) { | 105 if (!tb.hasLabel(DefaultLabels.TITLE)) { |
103 contentTextIndexes.addAll(tb.getContainedTextElements()); | 106 contentTextIndexes.addAll(tb.getContainedTextElements()); |
104 } | 107 } |
105 } | 108 } |
106 Collections.sort(contentTextIndexes); | 109 Collections.sort(contentTextIndexes); |
107 | 110 |
108 // Boilerpipe's text node indexes start at 1. | 111 // Boilerpipe's text node indexes start at 1. |
109 List<Node> contentNodes = new ArrayList<Node>(contentTextIndexes.size()) ; | 112 List<Node> contentNodes = new ArrayList<Node>(contentTextIndexes.size()) ; |
110 for (Integer i : contentTextIndexes) { | 113 for (Integer i : contentTextIndexes) { |
111 contentNodes.add(textNodes.get(i - 1)); | 114 contentNodes.add(textNodes.get(i - 1)); |
(...skipping 13 matching lines...) Expand all Loading... | |
125 link.setHref(link.getHref()); | 128 link.setHref(link.getHref()); |
126 } | 129 } |
127 | 130 |
128 NodeList<Element> allImages = root.getElementsByTagName("IMG"); | 131 NodeList<Element> allImages = root.getElementsByTagName("IMG"); |
129 for (int i = 0; i < allImages.getLength(); i++) { | 132 for (int i = 0; i < allImages.getLength(); i++) { |
130 ImageElement image = ImageElement.as(allImages.getItem(i)); | 133 ImageElement image = ImageElement.as(allImages.getItem(i)); |
131 image.setSrc(image.getSrc()); | 134 image.setSrc(image.getSrc()); |
132 } | 135 } |
133 } | 136 } |
134 } | 137 } |
OLD | NEW |