OLD | NEW |
| (Empty) |
1 /// This library contains extra APIs that aren't in the DOM, but are useful | |
2 /// when interacting with the parse tree. | |
3 library dom_parsing; | |
4 | |
5 import 'dom.dart'; | |
6 import 'src/constants.dart' show rcdataElements; | |
7 | |
8 /// A simple tree visitor for the DOM nodes. | |
9 class TreeVisitor { | |
10 visit(Node node) { | |
11 switch (node.nodeType) { | |
12 case Node.ELEMENT_NODE: | |
13 return visitElement(node); | |
14 case Node.TEXT_NODE: | |
15 return visitText(node); | |
16 case Node.COMMENT_NODE: | |
17 return visitComment(node); | |
18 case Node.DOCUMENT_FRAGMENT_NODE: | |
19 return visitDocumentFragment(node); | |
20 case Node.DOCUMENT_NODE: | |
21 return visitDocument(node); | |
22 case Node.DOCUMENT_TYPE_NODE: | |
23 return visitDocumentType(node); | |
24 default: | |
25 throw new UnsupportedError('DOM node type ${node.nodeType}'); | |
26 } | |
27 } | |
28 | |
29 visitChildren(Node node) { | |
30 // Allow for mutations (remove works) while iterating. | |
31 for (var child in node.nodes.toList()) visit(child); | |
32 } | |
33 | |
34 /// The fallback handler if the more specific visit method hasn't been | |
35 /// overriden. Only use this from a subclass of [TreeVisitor], otherwise | |
36 /// call [visit] instead. | |
37 visitNodeFallback(Node node) => visitChildren(node); | |
38 | |
39 visitDocument(Document node) => visitNodeFallback(node); | |
40 | |
41 visitDocumentType(DocumentType node) => visitNodeFallback(node); | |
42 | |
43 visitText(Text node) => visitNodeFallback(node); | |
44 | |
45 // TODO(jmesserly): visit attributes. | |
46 visitElement(Element node) => visitNodeFallback(node); | |
47 | |
48 visitComment(Comment node) => visitNodeFallback(node); | |
49 | |
50 visitDocumentFragment(DocumentFragment node) => visitNodeFallback(node); | |
51 } | |
52 | |
53 /// Converts the DOM tree into an HTML string with code markup suitable for | |
54 /// displaying the HTML's source code with CSS colors for different parts of the | |
55 /// markup. See also [CodeMarkupVisitor]. | |
56 String htmlToCodeMarkup(Node node) { | |
57 return (new CodeMarkupVisitor()..visit(node)).toString(); | |
58 } | |
59 | |
60 /// Converts the DOM tree into an HTML string with code markup suitable for | |
61 /// displaying the HTML's source code with CSS colors for different parts of the | |
62 /// markup. See also [htmlToCodeMarkup]. | |
63 class CodeMarkupVisitor extends TreeVisitor { | |
64 final StringBuffer _str; | |
65 | |
66 CodeMarkupVisitor() : _str = new StringBuffer(); | |
67 | |
68 String toString() => _str.toString(); | |
69 | |
70 visitDocument(Document node) { | |
71 _str.write("<pre>"); | |
72 visitChildren(node); | |
73 _str.write("</pre>"); | |
74 } | |
75 | |
76 visitDocumentType(DocumentType node) { | |
77 _str.write('<code class="markup doctype"><!DOCTYPE ${node.name}>' | |
78 '</code>'); | |
79 } | |
80 | |
81 visitText(Text node) { | |
82 writeTextNodeAsHtml(_str, node); | |
83 } | |
84 | |
85 visitElement(Element node) { | |
86 final tag = node.localName; | |
87 _str.write('<<code class="markup element-name">$tag</code>'); | |
88 if (node.attributes.length > 0) { | |
89 node.attributes.forEach((key, v) { | |
90 v = htmlSerializeEscape(v, attributeMode: true); | |
91 _str.write(' <code class="markup attribute-name">$key</code>' | |
92 '=<code class="markup attribute-value">"$v"</code>'); | |
93 }); | |
94 } | |
95 if (node.nodes.length > 0) { | |
96 _str.write(">"); | |
97 visitChildren(node); | |
98 } else if (isVoidElement(tag)) { | |
99 _str.write(">"); | |
100 return; | |
101 } | |
102 _str.write('</<code class="markup element-name">$tag</code>>'); | |
103 } | |
104 | |
105 visitComment(Comment node) { | |
106 var data = htmlSerializeEscape(node.data); | |
107 _str.write('<code class="markup comment"><!--${data}--></code>'); | |
108 } | |
109 } | |
110 | |
111 // TODO(jmesserly): reconcile this with dart:web htmlEscape. | |
112 // This one might be more useful, as it is HTML5 spec compliant. | |
113 /// Escapes [text] for use in the | |
114 /// [HTML fragment serialization algorithm][1]. In particular, as described | |
115 /// in the [specification][2]: | |
116 /// | |
117 /// - Replace any occurrence of the `&` character by the string `&`. | |
118 /// - Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the | |
119 /// string ` `. | |
120 /// - If the algorithm was invoked in [attributeMode], replace any occurrences | |
121 /// of the `"` character by the string `"`. | |
122 /// - If the algorithm was not invoked in [attributeMode], replace any | |
123 /// occurrences of the `<` character by the string `<`, and any occurrences | |
124 /// of the `>` character by the string `>`. | |
125 /// | |
126 /// [1]: http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.htm
l#serializing-html-fragments | |
127 /// [2]: http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.htm
l#escapingString | |
128 String htmlSerializeEscape(String text, {bool attributeMode: false}) { | |
129 // TODO(jmesserly): is it faster to build up a list of codepoints? | |
130 // StringBuffer seems cleaner assuming Dart can unbox 1-char strings. | |
131 StringBuffer result = null; | |
132 for (int i = 0; i < text.length; i++) { | |
133 var ch = text[i]; | |
134 String replace = null; | |
135 switch (ch) { | |
136 case '&': | |
137 replace = '&'; | |
138 break; | |
139 case '\u00A0' /*NO-BREAK SPACE*/ : | |
140 replace = ' '; | |
141 break; | |
142 case '"': | |
143 if (attributeMode) replace = '"'; | |
144 break; | |
145 case '<': | |
146 if (!attributeMode) replace = '<'; | |
147 break; | |
148 case '>': | |
149 if (!attributeMode) replace = '>'; | |
150 break; | |
151 } | |
152 if (replace != null) { | |
153 if (result == null) result = new StringBuffer(text.substring(0, i)); | |
154 result.write(replace); | |
155 } else if (result != null) { | |
156 result.write(ch); | |
157 } | |
158 } | |
159 | |
160 return result != null ? result.toString() : text; | |
161 } | |
162 | |
163 /// Returns true if this tag name is a void element. | |
164 /// This method is useful to a pretty printer, because void elements must not | |
165 /// have an end tag. | |
166 /// See also: <http://dev.w3.org/html5/markup/syntax.html#void-elements>. | |
167 bool isVoidElement(String tagName) { | |
168 switch (tagName) { | |
169 case "area": | |
170 case "base": | |
171 case "br": | |
172 case "col": | |
173 case "command": | |
174 case "embed": | |
175 case "hr": | |
176 case "img": | |
177 case "input": | |
178 case "keygen": | |
179 case "link": | |
180 case "meta": | |
181 case "param": | |
182 case "source": | |
183 case "track": | |
184 case "wbr": | |
185 return true; | |
186 } | |
187 return false; | |
188 } | |
189 | |
190 /// Serialize text node according to: | |
191 /// <http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#ht
ml-fragment-serialization-algorithm> | |
192 void writeTextNodeAsHtml(StringBuffer str, Text node) { | |
193 // Don't escape text for certain elements, notably <script>. | |
194 final parent = node.parentNode; | |
195 if (parent is Element) { | |
196 var tag = parent.localName; | |
197 if (rcdataElements.contains(tag) || tag == 'plaintext') { | |
198 str.write(node.data); | |
199 return; | |
200 } | |
201 } | |
202 str.write(htmlSerializeEscape(node.data)); | |
203 } | |
OLD | NEW |