Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(167)

Side by Side Diff: third_party/pkg/html5lib/lib/dom_parsing.dart

Issue 22375011: move html5lib code into dart svn repo (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 7 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /**
2 * This library contains extra APIs that aren't in the DOM, but are useful
3 * when interacting with the parse tree.
4 */
5 library dom_parsing;
6
7 import 'dart:math';
8 import 'dart:utf' show codepointsToString;
9 import 'dom.dart';
10
11 /** A simple tree visitor for the DOM nodes. */
12 class TreeVisitor {
13 visit(Node node) {
14 switch (node.nodeType) {
15 case Node.ELEMENT_NODE: return visitElement(node);
16 case Node.TEXT_NODE: return visitText(node);
17 case Node.COMMENT_NODE: return visitComment(node);
18 case Node.DOCUMENT_FRAGMENT_NODE: return visitDocumentFragment(node);
19 case Node.DOCUMENT_NODE: return visitDocument(node);
20 case Node.DOCUMENT_TYPE_NODE: return visitDocumentType(node);
21 default: throw new UnsupportedError('DOM node type ${node.nodeType}');
22 }
23 }
24
25 visitChildren(Node node) {
26 // Allow for mutations (remove works) while iterating.
27 for (var child in node.nodes.toList()) visit(child);
28 }
29
30 /**
31 * The fallback handler if the more specific visit method hasn't been
32 * overriden. Only use this from a subclass of [TreeVisitor], otherwise
33 * call [visit] instead.
34 */
35 visitNodeFallback(Node node) => visitChildren(node);
36
37 visitDocument(Document node) => visitNodeFallback(node);
38
39 visitDocumentType(DocumentType node) => visitNodeFallback(node);
40
41 visitText(Text node) => visitNodeFallback(node);
42
43 // TODO(jmesserly): visit attributes.
44 visitElement(Element node) => visitNodeFallback(node);
45
46 visitComment(Comment node) => visitNodeFallback(node);
47
48 // Note: visits document by default because DocumentFragment is a Document.
49 visitDocumentFragment(DocumentFragment node) => visitDocument(node);
50 }
51
52 /**
53 * Converts the DOM tree into an HTML string with code markup suitable for
54 * displaying the HTML's source code with CSS colors for different parts of the
55 * markup. See also [CodeMarkupVisitor].
56 */
57 String htmlToCodeMarkup(Node node) {
58 return (new CodeMarkupVisitor()..visit(node)).toString();
59 }
60
61 /**
62 * Converts the DOM tree into an HTML string with code markup suitable for
63 * displaying the HTML's source code with CSS colors for different parts of the
64 * markup. See also [htmlToCodeMarkup].
65 */
66 class CodeMarkupVisitor extends TreeVisitor {
67 final StringBuffer _str;
68
69 CodeMarkupVisitor() : _str = new StringBuffer();
70
71 String toString() => _str.toString();
72
73 visitDocument(Document node) {
74 _str.write("<pre>");
75 visitChildren(node);
76 _str.write("</pre>");
77 }
78
79 visitDocumentType(DocumentType node) {
80 _str.write('<code class="markup doctype">&lt;!DOCTYPE ${node.tagName}>'
81 '</code>');
82 }
83
84 visitText(Text node) {
85 // TODO(jmesserly): would be nice to use _addOuterHtml directly.
86 _str.write(node.outerHtml);
87 }
88
89 visitElement(Element node) {
90 _str.write('&lt;<code class="markup element-name">${node.tagName}</code>');
91 if (node.attributes.length > 0) {
92 node.attributes.forEach((key, v) {
93 v = htmlSerializeEscape(v, attributeMode: true);
94 _str.write(' <code class="markup attribute-name">$key</code>'
95 '=<code class="markup attribute-value">"$v"</code>');
96 });
97 }
98 if (node.nodes.length > 0) {
99 _str.write(">");
100 visitChildren(node);
101 } else if (isVoidElement(node.tagName)) {
102 _str.write(">");
103 return;
104 }
105 _str.write(
106 '&lt;/<code class="markup element-name">${node.tagName}</code>>');
107 }
108
109 visitComment(Comment node) {
110 var data = htmlSerializeEscape(node.data);
111 _str.write('<code class="markup comment">&lt;!--${data}--></code>');
112 }
113 }
114
115
116 // TODO(jmesserly): reconcile this with dart:web htmlEscape.
117 // This one might be more useful, as it is HTML5 spec compliant.
118 /**
119 * Escapes [text] for use in the
120 * [HTML fragment serialization algorithm][1]. In particular, as described
121 * in the [specification][2]:
122 *
123 * - Replace any occurrence of the `&` character by the string `&amp;`.
124 * - Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the
125 * string `&nbsp;`.
126 * - If the algorithm was invoked in [attributeMode], replace any occurrences of
127 * the `"` character by the string `&quot;`.
128 * - If the algorithm was not invoked in [attributeMode], replace any
129 * occurrences of the `<` character by the string `&lt;`, and any occurrences
130 * of the `>` character by the string `&gt;`.
131 *
132 * [1]: http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html #serializing-html-fragments
133 * [2]: http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html #escapingString
134 */
135 String htmlSerializeEscape(String text, {bool attributeMode: false}) {
136 // TODO(jmesserly): is it faster to build up a list of codepoints?
137 // StringBuffer seems cleaner assuming Dart can unbox 1-char strings.
138 StringBuffer result = null;
139 for (int i = 0; i < text.length; i++) {
140 var ch = text[i];
141 String replace = null;
142 switch (ch) {
143 case '&': replace = '&amp;'; break;
144 case '\u00A0'/*NO-BREAK SPACE*/: replace = '&nbsp;'; break;
145 case '"': if (attributeMode) replace = '&quot;'; break;
146 case '<': if (!attributeMode) replace = '&lt;'; break;
147 case '>': if (!attributeMode) replace = '&gt;'; break;
148 }
149 if (replace != null) {
150 if (result == null) result = new StringBuffer(text.substring(0, i));
151 result.write(replace);
152 } else if (result != null) {
153 result.write(ch);
154 }
155 }
156
157 return result != null ? result.toString() : text;
158 }
159
160
161 /**
162 * Returns true if this tag name is a void element.
163 * This method is useful to a pretty printer, because void elements must not
164 * have an end tag.
165 * See <http://dev.w3.org/html5/markup/syntax.html#void-elements> for more info.
166 */
167 bool isVoidElement(String tagName) {
168 switch (tagName) {
169 case "area": case "base": case "br": case "col": case "command":
170 case "embed": case "hr": case "img": case "input": case "keygen":
171 case "link": case "meta": case "param": case "source": case "track":
172 case "wbr":
173 return true;
174 }
175 return false;
176 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698