Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(525)

Side by Side Diff: src/com/dom_distiller/client/DomToSaxParser.java

Issue 275493007: filter out invisible elements (Closed) Base URL: https://code.google.com/p/dom-distiller/@master
Patch Set: addressed comments Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 package com.dom_distiller.client;
6
7 import com.google.gwt.core.client.JsArray;
8 import com.google.gwt.dom.client.Element;
9 import com.google.gwt.dom.client.Node;
10 import com.google.gwt.dom.client.NodeList;
11 import com.google.gwt.dom.client.Text;
12
13 import org.xml.sax.Attributes;
14 import org.xml.sax.AttributesImpl;
15 import org.xml.sax.ContentHandler;
16 import org.xml.sax.SAXException;
17
18 import java.util.ArrayList;
19 import java.util.List;
20 import java.util.logging.Logger;
21
22 /**
23 * Used to generate sax events from the DOM tree.
24 */
25 public class DomToSaxParser {
26 static Logger logger = Logger.getLogger("DomToSaxParser");
27
28 private static class DomToSaxVisitor implements DomWalker.Visitor {
29 private static final String sHtmlNamespace = "http://www.w3.org/1999/xht ml";
30 private final ContentHandler handler;
31 private List<Node> textNodes;
32
33 DomToSaxVisitor(ContentHandler h) {
34 handler = h;
35 textNodes = new ArrayList<Node>();
36 }
37
38 @Override
39 public boolean visit(Node n) {
40 try {
41 switch (n.getNodeType()) {
42 case Node.TEXT_NODE:
43 textNodes.add(n);
44 String text = Text.as(n).getData();
45 handler.characters(text.toCharArray(), 0, text.length()) ;
46 return false;
47 case Node.ELEMENT_NODE:
48 Element e = Element.as(n);
49 Attributes attrs = getSaxAttributes(e);
50 handler.startElement(sHtmlNamespace, e.getTagName(), e.g etTagName(), attrs);
51 return true;
52 case Node.DOCUMENT_NODE: // Don't recurse into sub-document s.
53 default: // This case is for comment nodes.
54 return false;
55 }
56 } catch (SAXException e) {
57 return false;
58 }
59 }
60
61 @Override
62 public void exit(Node n) {
63 Element e = Element.as(n);
64 try {
65 handler.endElement(sHtmlNamespace, e.getTagName(), e.getTagName( ));
66 } catch (SAXException ex) {
67 // Intentionally ignored.
68 }
69 }
70 }
71
72 /**
73 * This will generate sax events for the DOM tree rooted at e to the provide d ContentHandler.
74 *
75 * @Return A list of the text nodes (in order).
76 */
77 static List<Node> parse(Element e, ContentHandler handler) {
78 DomToSaxVisitor visitor = new DomToSaxVisitor(handler);
79 new DomWalker(visitor).walk(e);
80 return visitor.textNodes;
81 }
82
83 /**
84 * @Return The element's attribute list converted to org.xml.sax.Attributes.
85 */
86 public static Attributes getSaxAttributes(Element e) {
87 AttributesImpl attrs = new AttributesImpl();
88
89 JsArray<Node> jsAttrs = DomUtil.getAttributes(e);
90 for (int i = 0; i < jsAttrs.length(); ++i) {
91 final Node jsAttr = jsAttrs.get(i);
92 attrs.addAttribute("", jsAttr.getNodeName(), jsAttr.getNodeName(), " CDATA", jsAttr.getNodeValue());
93 }
94
95 return attrs;
96 }
97 }
OLDNEW
« no previous file with comments | « src/com/dom_distiller/client/ContentExtractor.java ('k') | src/com/dom_distiller/client/DomToSaxVisitor.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698