| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package com.dom_distiller.client; | 5 package com.dom_distiller.client; |
| 6 | 6 |
| 7 import com.google.gwt.dom.client.AnchorElement; | 7 import com.google.gwt.dom.client.AnchorElement; |
| 8 import com.google.gwt.dom.client.Document; | 8 import com.google.gwt.dom.client.Document; |
| 9 import com.google.gwt.dom.client.Element; | 9 import com.google.gwt.dom.client.Element; |
| 10 import com.google.gwt.dom.client.ImageElement; | 10 import com.google.gwt.dom.client.ImageElement; |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 66 List<Node> contentNodes = getContentNodesForTextDocument(document, textN
odes); | 66 List<Node> contentNodes = getContentNodesForTextDocument(document, textN
odes); |
| 67 | 67 |
| 68 List<Node> contentAndImages = RelevantImageFinder.findAndAddImages( | 68 List<Node> contentAndImages = RelevantImageFinder.findAndAddImages( |
| 69 contentNodes, Document.get().getDocumentElement()); | 69 contentNodes, Document.get().getDocumentElement()); |
| 70 | 70 |
| 71 if (contentAndImages.isEmpty()) { | 71 if (contentAndImages.isEmpty()) { |
| 72 return ""; | 72 return ""; |
| 73 } | 73 } |
| 74 | 74 |
| 75 Node clonedSubtree = NodeListExpander.expand(contentAndImages).cloneSubt
ree(); | 75 Node clonedSubtree = NodeListExpander.expand(contentAndImages).cloneSubt
ree(); |
| 76 | |
| 77 if (clonedSubtree.getNodeType() != Node.ELEMENT_NODE) { | 76 if (clonedSubtree.getNodeType() != Node.ELEMENT_NODE) { |
| 78 return ""; | 77 return ""; |
| 79 } | 78 } |
| 80 | 79 |
| 81 // The base URL in the distilled page viewer is different from that in | 80 // The base URL in the distilled page viewer is different from that in |
| 82 // the live page. This breaks all relative links (in anchors and | 81 // the live page. This breaks all relative links (in anchors and |
| 83 // images), so make them absolute in the distilled content. | 82 // images), so make them absolute in the distilled content. |
| 84 makeAllLinksAbsolute(clonedSubtree); | 83 makeAllLinksAbsolute(clonedSubtree); |
| 85 | 84 |
| 86 // TODO(cjhopman): this discards the top element and just returns its ch
ildren. This might | 85 // TODO(cjhopman): this discards the top element and just returns its ch
ildren. This might |
| 87 // break in some cases. | 86 // break in some cases. |
| 88 return Element.as(clonedSubtree).getInnerHTML(); | 87 return Element.as(clonedSubtree).getInnerHTML(); |
| 89 } | 88 } |
| 90 | 89 |
| 91 private static List<Node> parse(Element e, ContentHandler handler) { | 90 private static List<Node> parse(Element e, ContentHandler handler) { |
| 92 DomToSaxVisitor domToSaxVisitor = new DomToSaxVisitor(handler); | 91 DomToSaxVisitor domToSaxVisitor = new DomToSaxVisitor(handler); |
| 93 FilteringDomVisitor filteringDomVisitor = new FilteringDomVisitor(domToS
axVisitor); | 92 FilteringDomVisitor filteringDomVisitor = new FilteringDomVisitor(domToS
axVisitor); |
| 94 new DomWalker(filteringDomVisitor).walk(e); | 93 new DomWalker(filteringDomVisitor).walk(e); |
| 95 return domToSaxVisitor.getTextNodes(); | 94 return domToSaxVisitor.getTextNodes(); |
| 96 } | 95 } |
| 97 | 96 |
| 98 private static List<Node> getContentNodesForTextDocument( | 97 private static List<Node> getContentNodesForTextDocument( |
| 99 TextDocument document, List<Node> textNodes) { | 98 TextDocument document, List<Node> textNodes) { |
| 100 List<Integer> contentTextIndexes = new ArrayList<Integer>(); | 99 List<Integer> contentTextIndexes = new ArrayList<Integer>(); |
| 101 for (TextBlock tb : document.getTextBlocks()) { | 100 for (TextBlock tb : document.getTextBlocks()) { |
| 101 if (!tb.isContent()) { |
| 102 continue; |
| 103 } |
| 102 if (!tb.hasLabel(DefaultLabels.TITLE)) { | 104 if (!tb.hasLabel(DefaultLabels.TITLE)) { |
| 103 contentTextIndexes.addAll(tb.getContainedTextElements()); | 105 contentTextIndexes.addAll(tb.getContainedTextElements()); |
| 104 } | 106 } |
| 105 } | 107 } |
| 106 Collections.sort(contentTextIndexes); | 108 Collections.sort(contentTextIndexes); |
| 107 | 109 |
| 108 // Boilerpipe's text node indexes start at 1. | 110 // Boilerpipe's text node indexes start at 1. |
| 109 List<Node> contentNodes = new ArrayList<Node>(contentTextIndexes.size())
; | 111 List<Node> contentNodes = new ArrayList<Node>(contentTextIndexes.size())
; |
| 110 for (Integer i : contentTextIndexes) { | 112 for (Integer i : contentTextIndexes) { |
| 111 contentNodes.add(textNodes.get(i - 1)); | 113 contentNodes.add(textNodes.get(i - 1)); |
| (...skipping 13 matching lines...) Expand all Loading... |
| 125 link.setHref(link.getHref()); | 127 link.setHref(link.getHref()); |
| 126 } | 128 } |
| 127 | 129 |
| 128 NodeList<Element> allImages = root.getElementsByTagName("IMG"); | 130 NodeList<Element> allImages = root.getElementsByTagName("IMG"); |
| 129 for (int i = 0; i < allImages.getLength(); i++) { | 131 for (int i = 0; i < allImages.getLength(); i++) { |
| 130 ImageElement image = ImageElement.as(allImages.getItem(i)); | 132 ImageElement image = ImageElement.as(allImages.getItem(i)); |
| 131 image.setSrc(image.getSrc()); | 133 image.setSrc(image.getSrc()); |
| 132 } | 134 } |
| 133 } | 135 } |
| 134 } | 136 } |
| OLD | NEW |