OLD | NEW |
(Empty) | |
| 1 /// This library contains extra APIs that aren't in the DOM, but are useful |
| 2 /// when interacting with the parse tree. |
| 3 library dom_parsing; |
| 4 |
| 5 import 'dom.dart'; |
| 6 import 'src/constants.dart' show rcdataElements; |
| 7 |
| 8 /// A simple tree visitor for the DOM nodes. |
| 9 class TreeVisitor { |
| 10 visit(Node node) { |
| 11 switch (node.nodeType) { |
| 12 case Node.ELEMENT_NODE: |
| 13 return visitElement(node); |
| 14 case Node.TEXT_NODE: |
| 15 return visitText(node); |
| 16 case Node.COMMENT_NODE: |
| 17 return visitComment(node); |
| 18 case Node.DOCUMENT_FRAGMENT_NODE: |
| 19 return visitDocumentFragment(node); |
| 20 case Node.DOCUMENT_NODE: |
| 21 return visitDocument(node); |
| 22 case Node.DOCUMENT_TYPE_NODE: |
| 23 return visitDocumentType(node); |
| 24 default: |
| 25 throw new UnsupportedError('DOM node type ${node.nodeType}'); |
| 26 } |
| 27 } |
| 28 |
| 29 visitChildren(Node node) { |
| 30 // Allow for mutations (remove works) while iterating. |
| 31 for (var child in node.nodes.toList()) visit(child); |
| 32 } |
| 33 |
| 34 /// The fallback handler if the more specific visit method hasn't been |
| 35 /// overriden. Only use this from a subclass of [TreeVisitor], otherwise |
| 36 /// call [visit] instead. |
| 37 visitNodeFallback(Node node) => visitChildren(node); |
| 38 |
| 39 visitDocument(Document node) => visitNodeFallback(node); |
| 40 |
| 41 visitDocumentType(DocumentType node) => visitNodeFallback(node); |
| 42 |
| 43 visitText(Text node) => visitNodeFallback(node); |
| 44 |
| 45 // TODO(jmesserly): visit attributes. |
| 46 visitElement(Element node) => visitNodeFallback(node); |
| 47 |
| 48 visitComment(Comment node) => visitNodeFallback(node); |
| 49 |
| 50 visitDocumentFragment(DocumentFragment node) => visitNodeFallback(node); |
| 51 } |
| 52 |
| 53 /// Converts the DOM tree into an HTML string with code markup suitable for |
| 54 /// displaying the HTML's source code with CSS colors for different parts of the |
| 55 /// markup. See also [CodeMarkupVisitor]. |
| 56 String htmlToCodeMarkup(Node node) { |
| 57 return (new CodeMarkupVisitor()..visit(node)).toString(); |
| 58 } |
| 59 |
| 60 /// Converts the DOM tree into an HTML string with code markup suitable for |
| 61 /// displaying the HTML's source code with CSS colors for different parts of the |
| 62 /// markup. See also [htmlToCodeMarkup]. |
| 63 class CodeMarkupVisitor extends TreeVisitor { |
| 64 final StringBuffer _str; |
| 65 |
| 66 CodeMarkupVisitor() : _str = new StringBuffer(); |
| 67 |
| 68 String toString() => _str.toString(); |
| 69 |
| 70 visitDocument(Document node) { |
| 71 _str.write("<pre>"); |
| 72 visitChildren(node); |
| 73 _str.write("</pre>"); |
| 74 } |
| 75 |
| 76 visitDocumentType(DocumentType node) { |
| 77 _str.write('<code class="markup doctype"><!DOCTYPE ${node.name}>' |
| 78 '</code>'); |
| 79 } |
| 80 |
| 81 visitText(Text node) { |
| 82 writeTextNodeAsHtml(_str, node); |
| 83 } |
| 84 |
| 85 visitElement(Element node) { |
| 86 final tag = node.localName; |
| 87 _str.write('<<code class="markup element-name">$tag</code>'); |
| 88 if (node.attributes.length > 0) { |
| 89 node.attributes.forEach((key, v) { |
| 90 v = htmlSerializeEscape(v, attributeMode: true); |
| 91 _str.write(' <code class="markup attribute-name">$key</code>' |
| 92 '=<code class="markup attribute-value">"$v"</code>'); |
| 93 }); |
| 94 } |
| 95 if (node.nodes.length > 0) { |
| 96 _str.write(">"); |
| 97 visitChildren(node); |
| 98 } else if (isVoidElement(tag)) { |
| 99 _str.write(">"); |
| 100 return; |
| 101 } |
| 102 _str.write('</<code class="markup element-name">$tag</code>>'); |
| 103 } |
| 104 |
| 105 visitComment(Comment node) { |
| 106 var data = htmlSerializeEscape(node.data); |
| 107 _str.write('<code class="markup comment"><!--${data}--></code>'); |
| 108 } |
| 109 } |
| 110 |
| 111 // TODO(jmesserly): reconcile this with dart:web htmlEscape. |
| 112 // This one might be more useful, as it is HTML5 spec compliant. |
| 113 /// Escapes [text] for use in the |
| 114 /// [HTML fragment serialization algorithm][1]. In particular, as described |
| 115 /// in the [specification][2]: |
| 116 /// |
| 117 /// - Replace any occurrence of the `&` character by the string `&`. |
| 118 /// - Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the |
| 119 /// string ` `. |
| 120 /// - If the algorithm was invoked in [attributeMode], replace any occurrences |
| 121 /// of the `"` character by the string `"`. |
| 122 /// - If the algorithm was not invoked in [attributeMode], replace any |
| 123 /// occurrences of the `<` character by the string `<`, and any occurrences |
| 124 /// of the `>` character by the string `>`. |
| 125 /// |
| 126 /// [1]: http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.htm
l#serializing-html-fragments |
| 127 /// [2]: http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.htm
l#escapingString |
| 128 String htmlSerializeEscape(String text, {bool attributeMode: false}) { |
| 129 // TODO(jmesserly): is it faster to build up a list of codepoints? |
| 130 // StringBuffer seems cleaner assuming Dart can unbox 1-char strings. |
| 131 StringBuffer result = null; |
| 132 for (int i = 0; i < text.length; i++) { |
| 133 var ch = text[i]; |
| 134 String replace = null; |
| 135 switch (ch) { |
| 136 case '&': |
| 137 replace = '&'; |
| 138 break; |
| 139 case '\u00A0' /*NO-BREAK SPACE*/ : |
| 140 replace = ' '; |
| 141 break; |
| 142 case '"': |
| 143 if (attributeMode) replace = '"'; |
| 144 break; |
| 145 case '<': |
| 146 if (!attributeMode) replace = '<'; |
| 147 break; |
| 148 case '>': |
| 149 if (!attributeMode) replace = '>'; |
| 150 break; |
| 151 } |
| 152 if (replace != null) { |
| 153 if (result == null) result = new StringBuffer(text.substring(0, i)); |
| 154 result.write(replace); |
| 155 } else if (result != null) { |
| 156 result.write(ch); |
| 157 } |
| 158 } |
| 159 |
| 160 return result != null ? result.toString() : text; |
| 161 } |
| 162 |
| 163 /// Returns true if this tag name is a void element. |
| 164 /// This method is useful to a pretty printer, because void elements must not |
| 165 /// have an end tag. |
| 166 /// See also: <http://dev.w3.org/html5/markup/syntax.html#void-elements>. |
| 167 bool isVoidElement(String tagName) { |
| 168 switch (tagName) { |
| 169 case "area": |
| 170 case "base": |
| 171 case "br": |
| 172 case "col": |
| 173 case "command": |
| 174 case "embed": |
| 175 case "hr": |
| 176 case "img": |
| 177 case "input": |
| 178 case "keygen": |
| 179 case "link": |
| 180 case "meta": |
| 181 case "param": |
| 182 case "source": |
| 183 case "track": |
| 184 case "wbr": |
| 185 return true; |
| 186 } |
| 187 return false; |
| 188 } |
| 189 |
| 190 /// Serialize text node according to: |
| 191 /// <http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#ht
ml-fragment-serialization-algorithm> |
| 192 void writeTextNodeAsHtml(StringBuffer str, Text node) { |
| 193 // Don't escape text for certain elements, notably <script>. |
| 194 final parent = node.parentNode; |
| 195 if (parent is Element) { |
| 196 var tag = parent.localName; |
| 197 if (rcdataElements.contains(tag) || tag == 'plaintext') { |
| 198 str.write(node.data); |
| 199 return; |
| 200 } |
| 201 } |
| 202 str.write(htmlSerializeEscape(node.data)); |
| 203 } |
OLD | NEW |