OLD | NEW |
| (Empty) |
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 part of dart.convert; | |
6 | |
7 /** | |
8 * A `String` converter that converts characters to HTML entities. | |
9 * | |
10 * This is intended to sanitice text before inserting the text into an HTML | |
11 * document. Characters that are meaningful in HTML are converted to | |
12 * HTML entities (like `&` for `&`). | |
13 * | |
14 * The general converter escapes all characters that are meaningful in HTML | |
15 * attributes or normal element context. Elements with special content types | |
16 * (like CSS or JavaScript) may need a more specialized escaping that | |
17 * understands that content type. | |
18 * | |
19 * If the context where the text will be inserted is known in more detail, | |
20 * it's possible to omit escaping some characters (like quotes when not | |
21 * inside an attribute value). | |
22 * | |
23 * The escaped text should only be used inside quoted HTML attributes values | |
24 * or as text content of a normal element. Using the escaped text inside a | |
25 * tag, but not inside a quoted attribute value, is still dangerous. | |
26 */ | |
27 const HtmlEscape HTML_ESCAPE = const HtmlEscape(); | |
28 | |
29 /** | |
30 * HTML escape modes. | |
31 * | |
32 * Allows specifying a mode for HTML escaping that depend on the context | |
33 * where the escaped result is going to be used. | |
34 * The relevant contexts are: | |
35 * | |
36 * * as text content of an HTML element. | |
37 * * as value of a (single- or double-) quoted attribute value. | |
38 * | |
39 * All modes require escaping of `&` (ampersand) characters, and may | |
40 * enable escaping of more characters. | |
41 * | |
42 * Custom escape modes can be created using the [HtmlEscapeMode.HtmlEscapeMode] | |
43 * constructor. | |
44 */ | |
45 class HtmlEscapeMode { | |
46 final String _name; | |
47 /** Whether to escape '<' and '>'. */ | |
48 final bool escapeLtGt; | |
49 /** Whether to escape '"' (quote). */ | |
50 final bool escapeQuot; | |
51 /** Whether to escape "'" (apostrophe). */ | |
52 final bool escapeApos; | |
53 /** | |
54 * Whether to escape "/" (forward slash, solidus). | |
55 * | |
56 * Escaping a slash is recommended to avoid cross-site scripting attacks by | |
57 * [the Open Web Application Security Project](https://www.owasp.org/index.php
/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet#RULE_.231_-_HTML_Escape_Befor
e_Inserting_Untrusted_Data_into_HTML_Element_Content) | |
58 */ | |
59 final bool escapeSlash; | |
60 | |
61 /** | |
62 * Default escaping mode which escape all characters. | |
63 * | |
64 * The result of such an escaping is usable both in element content and | |
65 * in any attribute value. | |
66 * | |
67 * The escaping only works for elements with normal HTML content, | |
68 * and not for, for example, script or style element content, | |
69 * which require escapes matching their particular content syntax. | |
70 */ | |
71 static const HtmlEscapeMode UNKNOWN = | |
72 const HtmlEscapeMode._('unknown', true, true, true, true); | |
73 | |
74 /** | |
75 * Escaping mode for text going into double-quoted HTML attribute values. | |
76 * | |
77 * The result should not be used as the content of an unquoted | |
78 * or single-quoted attribute value. | |
79 * | |
80 * Escapes double quotes (`"`) but not single quotes (`'`), | |
81 * and escapes `<` and `>` characters because they are not allowed | |
82 * in strict XHTML attributes | |
83 */ | |
84 static const HtmlEscapeMode ATTRIBUTE = | |
85 const HtmlEscapeMode._('attribute', true, true, false, false); | |
86 | |
87 /** | |
88 * Escaping mode for text going into single-quoted HTML attribute values. | |
89 * | |
90 * The result should not be used as the content of an unquoted | |
91 * or double-quoted attribute value. | |
92 * | |
93 * Escapes single quotes (`'`) but not double quotes (`"`), | |
94 * and escapes `<` and `>` characters because they are not allowed | |
95 * in strict XHTML attributes | |
96 */ | |
97 static const HtmlEscapeMode SQ_ATTRIBUTE = | |
98 const HtmlEscapeMode._('attribute', true, false, true, false); | |
99 | |
100 /** | |
101 * Escaping mode for text going into HTML element content. | |
102 * | |
103 * The escaping only works for elements with normal HTML content, | |
104 * and not for, for example, script or style element content, | |
105 * which require escapes matching their particular content syntax. | |
106 * | |
107 * Escapes `<` and `>` characters. | |
108 */ | |
109 static const HtmlEscapeMode ELEMENT = | |
110 const HtmlEscapeMode._('element', true, false, false, false); | |
111 | |
112 const HtmlEscapeMode._(this._name, | |
113 this.escapeLtGt, | |
114 this.escapeQuot, | |
115 this.escapeApos, | |
116 this.escapeSlash); | |
117 | |
118 /** | |
119 * Create a custom escaping mode. | |
120 * | |
121 * All modes escape `&`. | |
122 * The mode can further be set to escape `<` and `>` ([escapeLtGt]), | |
123 * `"` ([escapeQuot]), `'` ([escapeApos]), and/or `/` ([escapeSlash]). | |
124 */ | |
125 const HtmlEscapeMode({String name: "custom", | |
126 this.escapeLtGt: false, | |
127 this.escapeQuot: false, | |
128 this.escapeApos: false, | |
129 this.escapeSlash: false}) : _name = name; | |
130 | |
131 String toString() => _name; | |
132 } | |
133 | |
134 /** | |
135 * Converter which escapes characters with special meaning in HTML. | |
136 * | |
137 * The converter finds characters that are siginificant in HTML source and | |
138 * replaces them with corresponding HTML entities. | |
139 * | |
140 * The characters that need escaping in HTML are: | |
141 * | |
142 * * `&` (ampersand) always need to be escaped. | |
143 * * `<` (less than) and '>' (greater than) when inside an element. | |
144 * * `"` (quote) when inside a double-quoted attribute value. | |
145 * * `'` (apostrophe) when inside a single-quoted attribute value. | |
146 * Apostrophe is escaped as `'` instead of `'` since | |
147 * not all browsers understand `'`. | |
148 * * `/` (slash) is recommended to be escaped because it may be used | |
149 * to terminate an element in some HTML dialects. | |
150 * | |
151 * Escaping `>` (greater than) isn't necessary, but the result is often | |
152 * found to be easier to read if greater-than is also escaped whenever | |
153 * less-than is. | |
154 */ | |
155 class HtmlEscape extends Converter<String, String> { | |
156 | |
157 /** The [HtmlEscapeMode] used by the converter. */ | |
158 final HtmlEscapeMode mode; | |
159 | |
160 /** | |
161 * Create converter that escapes HTML characters. | |
162 * | |
163 * If [mode] is provided as either [HtmlEscapeMode.ATTRIBUTE] or | |
164 * [HtmlEscapeMode.ELEMENT], only the corresponding subset of HTML | |
165 * characters are escaped. | |
166 * The default is to escape all HTML characters. | |
167 */ | |
168 const HtmlEscape([this.mode = HtmlEscapeMode.UNKNOWN]); | |
169 | |
170 String convert(String text) { | |
171 var val = _convert(text, 0, text.length); | |
172 return val == null ? text : val; | |
173 } | |
174 | |
175 /** | |
176 * Converts the substring of text from start to end. | |
177 * | |
178 * Returns `null` if no changes were necessary, otherwise returns | |
179 * the converted string. | |
180 */ | |
181 String _convert(String text, int start, int end) { | |
182 StringBuffer result = null; | |
183 for (int i = start; i < end; i++) { | |
184 var ch = text[i]; | |
185 String replacement = null; | |
186 switch (ch) { | |
187 case '&': replacement = '&'; break; | |
188 case '"': if (mode.escapeQuot) replacement = '"'; break; | |
189 case "'": if (mode.escapeApos) replacement = '''; break; | |
190 case '<': if (mode.escapeLtGt) replacement = '<'; break; | |
191 case '>': if (mode.escapeLtGt) replacement = '>'; break; | |
192 case '/': if (mode.escapeSlash) replacement = '/'; break; | |
193 } | |
194 if (replacement != null) { | |
195 if (result == null) result = new StringBuffer(); | |
196 if (i > start) result.write(text.substring(start, i)); | |
197 result.write(replacement); | |
198 start = i + 1; | |
199 } | |
200 } | |
201 if (result == null) return null; | |
202 if (end > start) result.write(text.substring(start, end)); | |
203 return result.toString(); | |
204 } | |
205 | |
206 StringConversionSink startChunkedConversion(Sink<String> sink) { | |
207 if (sink is! StringConversionSink) { | |
208 sink = new StringConversionSink.from(sink); | |
209 } | |
210 return new _HtmlEscapeSink(this, sink); | |
211 } | |
212 } | |
213 | |
214 class _HtmlEscapeSink extends StringConversionSinkBase { | |
215 final HtmlEscape _escape; | |
216 final StringConversionSink _sink; | |
217 | |
218 _HtmlEscapeSink(this._escape, this._sink); | |
219 | |
220 void addSlice(String chunk, int start, int end, bool isLast) { | |
221 var val = _escape._convert(chunk, start, end); | |
222 if(val == null) { | |
223 _sink.addSlice(chunk, start, end, isLast); | |
224 } else { | |
225 _sink.add(val); | |
226 if (isLast) _sink.close(); | |
227 } | |
228 } | |
229 | |
230 void close() { _sink.close(); } | |
231 } | |
OLD | NEW |