OLD | NEW |
| (Empty) |
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 part of dart.uri; | |
6 | |
7 /** | |
8 * Javascript-like URI encode/decode functions. | |
9 * The documentation here borrows heavily from the original Javascript | |
10 * doumentation on MDN at: | |
11 * https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects | |
12 */ | |
13 | |
14 /** | |
15 * A JavaScript-like URI encoder. Encodes Uniform Resource Identifier [uri] | |
16 * by replacing each instance of certain characters by one, two, three, or four | |
17 * escape sequences representing the UTF-8 encoding of the character (will | |
18 * only be four escape sequences for characters composed of two "surrogate" | |
19 * characters). This assumes that [uri] is a complete URI, so does not encode | |
20 * reserved characters that have special meaning in the URI: [:#;,/?:@&=+\$:] | |
21 * It returns the escaped URI. | |
22 */ | |
23 String encodeUri(String uri) { | |
24 // Bit vector of 128 bits where each bit indicate whether a | |
25 // character code on the 0-127 needs to be escaped or not. | |
26 const canonicalTable = const [ | |
27 // LSB MSB | |
28 // | | | |
29 0x0000, // 0x00 - 0x0f 0000000000000000 | |
30 0x0000, // 0x10 - 0x1f 0000000000000000 | |
31 // ! #$ &'()*+,-./ | |
32 0xf7da, // 0x20 - 0x2f 0101101111101111 | |
33 // 0123456789:; = ? | |
34 0xafff, // 0x30 - 0x3f 1111111111110101 | |
35 // @ABCDEFGHIJKLMNO | |
36 0xffff, // 0x40 - 0x4f 1111111111111111 | |
37 // PQRSTUVWXYZ _ | |
38 0x87ff, // 0x50 - 0x5f 1111111111100001 | |
39 // abcdefghijklmno | |
40 0xfffe, // 0x60 - 0x6f 0111111111111111 | |
41 // pqrstuvwxyz ~ | |
42 0x47ff]; // 0x70 - 0x7f 1111111111100010 | |
43 return _uriEncode(canonicalTable, uri); | |
44 } | |
45 | |
46 /** | |
47 * An implementation of JavaScript's decodeURIComponent function. | |
48 * Decodes a Uniform Resource Identifier [uri] previously created by | |
49 * encodeURI or by a similar routine. It replaces each escape sequence | |
50 * in [uri] with the character that it represents. It does not decode | |
51 * escape sequences that could not have been introduced by encodeURI. | |
52 * It returns the unescaped URI. | |
53 */ | |
54 String decodeUri(String uri) { | |
55 return _uriDecode(uri); | |
56 } | |
57 | |
58 /** | |
59 * A javaScript-like URI component encoder, this encodes a URI | |
60 * [component] by replacing each instance of certain characters by one, | |
61 * two, three, or four escape sequences representing the UTF-8 encoding of | |
62 * the character (will only be four escape sequences for characters composed | |
63 * of two "surrogate" characters). | |
64 * To avoid unexpected requests to the server, you should call | |
65 * encodeURIComponent on any user-entered parameters that will be passed as | |
66 * part of a URI. For example, a user could type "Thyme &time=again" for a | |
67 * variable comment. Not using encodeURIComponent on this variable will give | |
68 * comment=Thyme%20&time=again. Note that the ampersand and the equal sign | |
69 * mark a new key and value pair. So instead of having a POST comment key | |
70 * equal to "Thyme &time=again", you have two POST keys, one equal to "Thyme " | |
71 * and another (time) equal to again. | |
72 * It returns the escaped string. | |
73 */ | |
74 String encodeUriComponent(String component) { | |
75 // Bit vector of 128 bits where each bit indicate whether a | |
76 // character code on the 0-127 needs to be escaped or not. | |
77 const canonicalTable = const [ | |
78 // LSB MSB | |
79 // | | | |
80 0x0000, // 0x00 - 0x0f 0000000000000000 | |
81 0x0000, // 0x10 - 0x1f 0000000000000000 | |
82 // ! '()* -. | |
83 0x6782, // 0x20 - 0x2f 0100000111100110 | |
84 // 0123456789 | |
85 0x03ff, // 0x30 - 0x3f 1111111111000000 | |
86 // @ABCDEFGHIJKLMNO | |
87 0xfffe, // 0x40 - 0x4f 0111111111111111 | |
88 // PQRSTUVWXYZ _ | |
89 0x87ff, // 0x50 - 0x5f 1111111111100001 | |
90 // abcdefghijklmno | |
91 0xfffe, // 0x60 - 0x6f 0111111111111111 | |
92 // pqrstuvwxyz ~ | |
93 0x47ff]; // 0x70 - 0x7f 1111111111100010 | |
94 return _uriEncode(canonicalTable, component); | |
95 } | |
96 | |
97 /** | |
98 * An implementation of JavaScript's decodeURIComponent function. | |
99 * Decodes a Uniform Resource Identifier (URI) [component] previously | |
100 * created by encodeURIComponent or by a similar routine. | |
101 * It returns the unescaped string. | |
102 */ | |
103 String decodeUriComponent(String encodedComponent) { | |
104 return _uriDecode(encodedComponent); | |
105 } | |
106 | |
107 /** | |
108 * This is the internal implementation of JavaScript's encodeURI function. | |
109 * It encodes all characters in the string [text] except for those | |
110 * that appear in [canonicalTable], and returns the escaped string. | |
111 */ | |
112 String _uriEncode(List<int> canonicalTable, String text) { | |
113 final String hex = '0123456789ABCDEF'; | |
114 var byteToHex = (int v) => '%${hex[v >> 4]}${hex[v & 0x0f]}'; | |
115 StringBuffer result = new StringBuffer(); | |
116 for (int i = 0; i < text.length; i++) { | |
117 int ch = text.codeUnitAt(i); | |
118 if (ch < 128 && ((canonicalTable[ch >> 4] & (1 << (ch & 0x0f))) != 0)) { | |
119 result.write(text[i]); | |
120 } else if (text[i] == " ") { | |
121 result.write("+"); | |
122 } else { | |
123 if (ch >= 0xD800 && ch < 0xDC00) { | |
124 // Low surrogate. We expect a next char high surrogate. | |
125 ++i; | |
126 int nextCh = text.length == i ? 0 : text.codeUnitAt(i); | |
127 if (nextCh >= 0xDC00 && nextCh < 0xE000) { | |
128 // convert the pair to a U+10000 codepoint | |
129 ch = 0x10000 + ((ch - 0xD800) << 10) + (nextCh - 0xDC00); | |
130 } else { | |
131 throw new ArgumentError('Malformed URI'); | |
132 } | |
133 } | |
134 for (int codepoint in codepointsToUtf8([ch])) { | |
135 result.write(byteToHex(codepoint)); | |
136 } | |
137 } | |
138 } | |
139 return result.toString(); | |
140 } | |
141 | |
142 /** | |
143 * Convert a byte (2 character hex sequence) in string [s] starting | |
144 * at position [pos] to its ordinal value | |
145 */ | |
146 int _hexCharPairToByte(String s, int pos) { | |
147 int byte = 0; | |
148 for (int i = 0; i < 2; i++) { | |
149 var charCode = s.codeUnitAt(pos + i); | |
150 if (0x30 <= charCode && charCode <= 0x39) { | |
151 byte = byte * 16 + charCode - 0x30; | |
152 } else { | |
153 // Check ranges A-F (0x41-0x46) and a-f (0x61-0x66). | |
154 charCode |= 0x20; | |
155 if (0x61 <= charCode && charCode <= 0x66) { | |
156 byte = byte * 16 + charCode - 0x57; | |
157 } else { | |
158 throw new ArgumentError("Invalid URL encoding"); | |
159 } | |
160 } | |
161 } | |
162 return byte; | |
163 } | |
164 | |
165 /** | |
166 * A JavaScript-like decodeURI function. It unescapes the string [text] and | |
167 * returns the unescaped string. | |
168 */ | |
169 String _uriDecode(String text) { | |
170 StringBuffer result = new StringBuffer(); | |
171 List<int> codepoints = new List<int>(); | |
172 for (int i = 0; i < text.length;) { | |
173 String ch = text[i]; | |
174 if (ch != '%') { | |
175 if (ch == '+') { | |
176 result.write(" "); | |
177 } else { | |
178 result.write(ch); | |
179 } | |
180 i++; | |
181 } else { | |
182 codepoints.clear(); | |
183 while (ch == '%') { | |
184 if (++i > text.length - 2) { | |
185 throw new ArgumentError('Truncated URI'); | |
186 } | |
187 codepoints.add(_hexCharPairToByte(text, i)); | |
188 i += 2; | |
189 if (i == text.length) | |
190 break; | |
191 ch = text[i]; | |
192 } | |
193 result.write(decodeUtf8(codepoints)); | |
194 } | |
195 } | |
196 return result.toString(); | |
197 } | |
198 | |
OLD | NEW |