OLD | NEW |
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of dart.convert; | 5 part of dart.convert; |
6 | 6 |
7 // TODO(floitsch) - Document - Issue 13097 | 7 /** |
| 8 * A `String` converter that converts characters to HTML entities. |
| 9 * |
| 10 * This is intended to sanitice text before inserting the text into an HTML |
| 11 * document. Characters that are meaningful in HTML are converted to |
| 12 * HTML entities (like `&` for `&`). |
| 13 * |
| 14 * The general converter escapes all characters that are meaningful in HTML |
| 15 * attributes or normal element context. Elements with special content types |
| 16 * (like CSS or JavaScript) may need a more specialized escaping that |
| 17 * understands that content type. |
| 18 * |
| 19 * If the context where the text will be inserted is known in more detail, |
| 20 * it's possible to omit escaping some characters (like quotes when not |
| 21 * inside an attribute value). |
| 22 * |
| 23 * The escaped text should only be used inside quoted HTML attributes values |
| 24 * or as text content of a normal element. Using the escaped text inside a |
| 25 * tag, but not inside a quoted attribute value, is still dangerous. |
| 26 */ |
8 const HtmlEscape HTML_ESCAPE = const HtmlEscape(); | 27 const HtmlEscape HTML_ESCAPE = const HtmlEscape(); |
9 | 28 |
| 29 /** |
| 30 * HTML escape modes. |
| 31 * |
| 32 * Allows specifying a mode for HTML escaping that depend on the context |
| 33 * where the escaped result is going to be used. |
| 34 * The relevant contexts are: |
| 35 * |
| 36 * * as text content of an HTML element. |
| 37 * * as value of a (single- or double-) quoted attribute value. |
| 38 * |
| 39 * All modes require escaping of `&` (ampersand) characters, and may |
| 40 * enable escaping of more characters. |
| 41 * |
| 42 * Custom escape modes can be created using the [HtmlEscapeMode.HtmlEscapeMode] |
| 43 * constructor. |
| 44 */ |
10 class HtmlEscapeMode { | 45 class HtmlEscapeMode { |
11 final String _name; | 46 final String _name; |
| 47 /** Whether to escape '<' and '>'. */ |
12 final bool escapeLtGt; | 48 final bool escapeLtGt; |
| 49 /** Whether to escape '"' (quote). */ |
13 final bool escapeQuot; | 50 final bool escapeQuot; |
| 51 /** Whether to escape "'" (apostrophe). */ |
14 final bool escapeApos; | 52 final bool escapeApos; |
| 53 /** |
| 54 * Whether to escape "/" (forward slash, solidus). |
| 55 * |
| 56 * Escaping a slash is recommended to avoid cross-site scripting attacks by |
| 57 * [the Open Web Application Security Project](https://www.owasp.org/index.php
/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet#RULE_.231_-_HTML_Escape_Befor
e_Inserting_Untrusted_Data_into_HTML_Element_Content) |
| 58 */ |
15 final bool escapeSlash; | 59 final bool escapeSlash; |
16 | 60 |
17 // TODO(floitsch) - Document - Issue 13097 | 61 /** |
| 62 * Default escaping mode which escape all characters. |
| 63 * |
| 64 * The result of such an escaping is usable both in element content and |
| 65 * in any attribute value. |
| 66 * |
| 67 * The escaping only works for elements with normal HTML content, |
| 68 * and not for, for example, script or style element content, |
| 69 * which require escapes matching their particular content syntax. |
| 70 */ |
18 static const HtmlEscapeMode UNKNOWN = | 71 static const HtmlEscapeMode UNKNOWN = |
19 const HtmlEscapeMode._('unknown', true, true, true, true); | 72 const HtmlEscapeMode._('unknown', true, true, true, true); |
20 | 73 |
21 // TODO(floitsch) - Document - Issue 13097 | 74 /** |
| 75 * Escaping mode for text going into double-quoted HTML attribute values. |
| 76 * |
| 77 * The result should not be used as the content of an unquoted |
| 78 * or single-quoted attribute value. |
| 79 * |
| 80 * Escapes double quotes (`"`) but not single quotes (`'`), |
| 81 * and escapes `<` and `>` characters because they are not allowed |
| 82 * in strict XHTML attributes |
| 83 */ |
22 static const HtmlEscapeMode ATTRIBUTE = | 84 static const HtmlEscapeMode ATTRIBUTE = |
23 const HtmlEscapeMode._('attribute', false, true, false, false); | 85 const HtmlEscapeMode._('attribute', true, true, false, false); |
24 | 86 |
25 // TODO(floitsch) - Document - Issue 13097 | 87 /** |
| 88 * Escaping mode for text going into single-quoted HTML attribute values. |
| 89 * |
| 90 * The result should not be used as the content of an unquoted |
| 91 * or double-quoted attribute value. |
| 92 * |
| 93 * Escapes single quotes (`'`) but not double quotes (`"`), |
| 94 * and escapes `<` and `>` characters because they are not allowed |
| 95 * in strict XHTML attributes |
| 96 */ |
| 97 static const HtmlEscapeMode SQ_ATTRIBUTE = |
| 98 const HtmlEscapeMode._('attribute', true, false, true, false); |
| 99 |
| 100 /** |
| 101 * Escaping mode for text going into HTML element content. |
| 102 * |
| 103 * The escaping only works for elements with normal HTML content, |
| 104 * and not for, for example, script or style element content, |
| 105 * which require escapes matching their particular content syntax. |
| 106 * |
| 107 * Escapes `<` and `>` characters. |
| 108 */ |
26 static const HtmlEscapeMode ELEMENT = | 109 static const HtmlEscapeMode ELEMENT = |
27 const HtmlEscapeMode._('element', true, false, false, true); | 110 const HtmlEscapeMode._('element', true, false, false, false); |
28 | 111 |
29 // TODO(floitsch) - Document - Issue 13097 | 112 const HtmlEscapeMode._(this._name, |
30 const HtmlEscapeMode._(this._name, this.escapeLtGt, this.escapeQuot, | 113 this.escapeLtGt, |
31 this.escapeApos, this.escapeSlash); | 114 this.escapeQuot, |
| 115 this.escapeApos, |
| 116 this.escapeSlash); |
| 117 |
| 118 /** |
| 119 * Create a custom escaping mode. |
| 120 * |
| 121 * All modes escape `&`. |
| 122 * The mode can further be set to escape `<` and `>` ([escapeLtGt]), |
| 123 * `"` ([escapeQuot]), `'` ([escapeApos]), and/or `/` ([escapeSlash]). |
| 124 */ |
| 125 const HtmlEscapeMode({String name: "custom", |
| 126 this.escapeLtGt: false, |
| 127 this.escapeQuot: false, |
| 128 this.escapeApos: false, |
| 129 this.escapeSlash: false}) : _name = name; |
32 | 130 |
33 String toString() => _name; | 131 String toString() => _name; |
34 } | 132 } |
35 | 133 |
36 // TODO(floitsch) - Document - Issue 13097 | 134 /** |
37 class HtmlEscape extends Converter<String, String> { | 135 * Converter which escapes characters with special meaning in HTML. |
| 136 * |
| 137 * The converter finds characters that are siginificant in HTML source and |
| 138 * replaces them with corresponding HTML entities. |
| 139 * |
| 140 * The characters that need escaping in HTML are: |
| 141 * |
| 142 * * `&` (ampersand) always need to be escaped. |
| 143 * * `<` (less than) and '>' (greater than) when inside an element. |
| 144 * * `"` (quote) when inside a double-quoted attribute value. |
| 145 * * `'` (apostrophe) when inside a single-quoted attribute value. |
| 146 * Apostrophe is escaped as `'` instead of `'` since |
| 147 * not all browsers understand `'`. |
| 148 * * `/` (slash) is recommended to be escaped because it may be used |
| 149 * to terminate an element in some HTML dialects. |
| 150 * |
| 151 * Escaping `>` (greater than) isn't necessary, but the result is often |
| 152 * found to be easier to read if greater-than is also escaped whenever |
| 153 * less-than is. |
| 154 */ |
| 155 class HtmlEscape extends ChunkedConverter<String, String, String, String> { |
38 | 156 |
39 // TODO(floitsch) - Document - Issue 13097 | 157 /** The [HtmlEscapeMode] used by the converter. */ |
40 final HtmlEscapeMode mode; | 158 final HtmlEscapeMode mode; |
41 | 159 |
42 // TODO(floitsch) - Document - Issue 13097 | 160 /** |
| 161 * Create converter that escapes HTML characters. |
| 162 * |
| 163 * If [mode] is provided as either [HtmlEscapeMode.ATTRIBUTE] or |
| 164 * [HtmlEscapeMode.ELEMENT], only the corresponding subset of HTML |
| 165 * characters are escaped. |
| 166 * The default is to escape all HTML characters. |
| 167 */ |
43 const HtmlEscape([this.mode = HtmlEscapeMode.UNKNOWN]); | 168 const HtmlEscape([this.mode = HtmlEscapeMode.UNKNOWN]); |
44 | 169 |
45 String convert(String text) { | 170 String convert(String text) { |
46 var val = _convert(text, 0, text.length); | 171 var val = _convert(text, 0, text.length); |
47 return val == null ? text : val; | 172 return val == null ? text : val; |
48 } | 173 } |
49 | 174 |
| 175 /** |
| 176 * Converts the substring of text from start to end. |
| 177 * |
| 178 * Returns `null` if no changes were necessary, otherwise returns |
| 179 * the converted string. |
| 180 */ |
50 String _convert(String text, int start, int end) { | 181 String _convert(String text, int start, int end) { |
51 StringBuffer result = null; | 182 StringBuffer result = null; |
52 for (int i = start; i < end; i++) { | 183 for (int i = start; i < end; i++) { |
53 var ch = text[i]; | 184 var ch = text[i]; |
54 String replace = null; | 185 String replacement = null; |
55 switch (ch) { | 186 switch (ch) { |
56 case '&': replace = '&'; break; | 187 case '&': replacement = '&'; break; |
57 case '\u00A0'/*NO-BREAK SPACE*/: replace = ' '; break; | 188 case '"': if (mode.escapeQuot) replacement = '"'; break; |
58 case '"': if (mode.escapeQuot) replace = '"'; break; | 189 case "'": if (mode.escapeApos) replacement = '''; break; |
59 case "'": if (mode.escapeApos) replace = '''; break; | 190 case '<': if (mode.escapeLtGt) replacement = '<'; break; |
60 case '<': if (mode.escapeLtGt) replace = '<'; break; | 191 case '>': if (mode.escapeLtGt) replacement = '>'; break; |
61 case '>': if (mode.escapeLtGt) replace = '>'; break; | 192 case '/': if (mode.escapeSlash) replacement = '/'; break; |
62 case '/': if (mode.escapeSlash) replace = '/'; break; | |
63 } | 193 } |
64 if (replace != null) { | 194 if (replacement != null) { |
65 if (result == null) result = new StringBuffer(text.substring(start, i)); | 195 if (result == null) result = new StringBuffer(); |
66 result.write(replace); | 196 if (i > start) result.write(text.substring(start, i)); |
67 } else if (result != null) { | 197 result.write(replacement); |
68 result.write(ch); | 198 start = i + 1; |
69 } | 199 } |
70 } | 200 } |
71 | 201 if (result == null) return null; |
72 return result != null ? result.toString() : null; | 202 if (end > start) result.write(text.substring(start, end)); |
| 203 return result.toString(); |
73 } | 204 } |
74 | 205 |
75 StringConversionSink startChunkedConversion(Sink<String> sink) { | 206 StringConversionSink startChunkedConversion(Sink<String> sink) { |
76 if (sink is! StringConversionSink) { | 207 if (sink is! StringConversionSink) { |
77 sink = new StringConversionSink.from(sink); | 208 sink = new StringConversionSink.from(sink); |
78 } | 209 } |
79 return new _HtmlEscapeSink(this, sink); | 210 return new _HtmlEscapeSink(this, sink); |
80 } | 211 } |
81 } | 212 } |
82 | 213 |
83 class _HtmlEscapeSink extends StringConversionSinkBase { | 214 class _HtmlEscapeSink extends StringConversionSinkBase { |
84 final HtmlEscape _escape; | 215 final HtmlEscape _escape; |
85 final StringConversionSink _sink; | 216 final StringConversionSink _sink; |
86 | 217 |
87 _HtmlEscapeSink(this._escape, this._sink); | 218 _HtmlEscapeSink(this._escape, this._sink); |
88 | 219 |
89 void addSlice(String chunk, int start, int end, bool isLast) { | 220 void addSlice(String chunk, int start, int end, bool isLast) { |
90 var val = _escape._convert(chunk, start, end); | 221 var val = _escape._convert(chunk, start, end); |
91 if(val == null) { | 222 if(val == null) { |
92 _sink.addSlice(chunk, start, end, isLast); | 223 _sink.addSlice(chunk, start, end, isLast); |
93 } else { | 224 } else { |
94 _sink.add(val); | 225 _sink.add(val); |
95 if (isLast) _sink.close(); | 226 if (isLast) _sink.close(); |
96 } | 227 } |
97 } | 228 } |
98 | 229 |
99 void close() => _sink.close(); | 230 void close() { _sink.close(); } |
100 } | 231 } |
OLD | NEW |