OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 part of dart.core; | |
6 | |
7 /** | |
8 * A parsed URI, as specified by RFC-3986, http://tools.ietf.org/html/rfc3986. | |
Lasse Reichstein Nielsen
2013/05/28 08:45:33
More documentation here, please.
Describe the part
Søren Gjesse
2013/05/28 13:33:26
I absolutely agree. However I will like to postpon
| |
9 */ | |
10 class Uri { | |
11 int _port; | |
12 | |
13 /** | |
14 * Returns the scheme. | |
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Returns the scheme component of this URI.
General
Søren Gjesse
2013/05/28 13:33:26
Done.
| |
15 * | |
16 * Returns the empty string if there is no scheme. | |
17 */ | |
18 final String scheme; | |
19 | |
20 /** | |
21 * Returns the authority. | |
22 * | |
23 * The authority is formatted from the [userInfo], [host] and [port] | |
24 * components. | |
25 * | |
26 * Returns the empty string if there is no authority. | |
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Why empty string and not, e.g., null?
(I assume it
Søren Gjesse
2013/05/28 13:33:26
This is not changed from the current behavior in d
| |
27 */ | |
28 String get authority { | |
29 if (!hasAuthority) return ""; | |
30 var sb = new StringBuffer(); | |
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Two spaces after "=".
Søren Gjesse
2013/05/28 13:33:26
Done.
| |
31 _writeAuthority(sb); | |
32 return sb.toString(); | |
33 } | |
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Add empty line.
Søren Gjesse
2013/05/28 13:33:26
Done.
| |
34 /** | |
35 * Returns the user info part of the authority. | |
36 * | |
37 * Returns the empty string if there is no user info in the authority. | |
38 */ | |
39 final String userInfo; | |
40 | |
41 /** | |
42 * Returns the host part of the authority. | |
43 * | |
44 * Returns the empty string if there is no authority and hence no host. | |
45 */ | |
46 final String host; | |
47 | |
48 /** | |
49 * Returns the port part of the authority. | |
50 * | |
51 * Returns 0 if there is no port in the authority. | |
Lasse Reichstein Nielsen
2013/05/28 08:45:33
I can see "port zero is a wildcard" as precedented
Søren Gjesse
2013/05/28 13:33:26
Again this is the behavior from the current dart:u
| |
52 */ | |
53 int get port => _port; | |
54 | |
55 /** | |
56 * Returns the path. | |
57 * | |
58 * The returned path is encoded. To get direct access to the decoded | |
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Encoded how, and encoded from what?
The version th
Søren Gjesse
2013/05/28 13:33:26
I agree. I think adding a general comment above ab
| |
59 * path use [pathSegments]. | |
60 * | |
61 * Returns the empty string if there is no path. | |
62 */ | |
63 final String path; | |
64 | |
65 /** | |
66 * Returns the URI query. The returned query is encoded. To get | |
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Ditto for encoded: What has been encoded and how?
Søren Gjesse
2013/05/28 13:33:26
See above.
| |
67 * direct access to the decoded query use [queryParameters]. | |
68 * | |
69 * Returns the empty string if there is no query. | |
70 */ | |
71 final String query; | |
72 | |
73 /** | |
74 * Returns the fragment. | |
75 * | |
76 * Returns the empty string if there is no fragment. | |
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Is there a way to distinguish:
foo:bar#
from
f
Søren Gjesse
2013/05/28 13:33:26
No and I don't think so. We could use null vs. the
| |
77 */ | |
78 final String fragment; | |
79 | |
80 /** | |
81 * Creates a new URI object by parsing a URI string. | |
82 */ | |
83 static Uri parse(String uri) => new Uri._fromMatch(_splitRe.firstMatch(uri)); | |
84 | |
85 Uri._fromMatch(Match m) : | |
86 this(scheme: _emptyIfNull(m[_COMPONENT_SCHEME]), | |
87 userInfo: _emptyIfNull(m[_COMPONENT_USER_INFO]), | |
88 host: _eitherOf( | |
89 m[_COMPONENT_HOST], m[_COMPONENT_HOST_IPV6]), | |
90 port: _parseIntOrZero(m[_COMPONENT_PORT]), | |
91 path: _emptyIfNull(m[_COMPONENT_PATH]), | |
92 query: _emptyIfNull(m[_COMPONENT_QUERY_DATA]), | |
93 fragment: _emptyIfNull(m[_COMPONENT_FRAGMENT])); | |
94 | |
95 /* | |
96 * Create a new URI from its components. | |
97 * | |
98 * Each component is set through a named argument. Any number of | |
99 * components can be provided. The default value for the components | |
100 * not provided is the empry string, except for [port] which has a | |
101 * default value of 0. The [path] and [query] components can be set | |
102 * using two different named arguments. | |
103 * | |
104 * The scheme component is set through [scheme]. The scheme is | |
105 * normalized to all lowercase letters. | |
106 * | |
107 * The user info part of the authority component is set through | |
108 * [userInfo]. | |
109 * | |
110 * The host part of the authority component is set through | |
111 * [host]. The host can either be a hostname, a IPv4 address or an | |
112 * IPv6 address, contained in '[' and ']'. If the host contains a | |
113 * ':' character, the '[' and ']' are added if not already provided. | |
114 * | |
115 * The port part of the authority component is set through | |
116 * [port]. The port is normalized for scheme http and https where | |
117 * port 80 and port 443 respectively is set. | |
118 * | |
119 * The path component is set through either [path] or | |
120 * [pathSegments]. When [path] is used, the provided string is | |
121 * expected to be fully percent-encoded, and is used in its literal | |
122 * form. When [pathSegments] is used, each of the provided segments | |
123 * is percent-encoded and joined using the forward slash | |
124 * separator. The percent-encoding of the path segments encodes all | |
125 * characters except for the unreserved characters and the following | |
126 * list of characters: `!$&'()*+,;=:@`. | |
127 * | |
128 * The query component is set through either [query] or | |
129 * [queryParameters]. When [query] is used the provided string is | |
130 * expected to be fully percent-encoded and is used in its literal | |
131 * form. When [queryParameters] is used the query is built from the | |
132 * provided map. Each key and value in the map is percent-encoded | |
133 * and joined using equal and ampersand characters. The | |
134 * percent-encoding of the keys and values encodes all characters | |
135 * except for the unreserved characters. | |
136 * | |
137 * The fragment component is set through [fragment]. | |
138 */ | |
139 Uri({scheme, | |
140 this.userInfo: "", | |
141 this.host: "", | |
142 port: 0, | |
143 String path, | |
144 List<String> pathSegments, | |
145 String query, | |
146 Map<String, String> queryParameters, | |
147 fragment: ""}) : | |
148 scheme = _makeScheme(scheme), | |
149 path = _makePath(path, pathSegments), | |
150 query = _makeQuery(query, queryParameters), | |
151 fragment = _makeFragment(fragment) { | |
152 // Perform scheme specific normalization. | |
153 if (scheme == "http" && port == 80) { | |
154 _port = 0; | |
155 } else if (scheme == "https" && port == 443) { | |
156 _port = 0; | |
157 } else { | |
158 _port = port; | |
159 } | |
160 } | |
161 | |
162 /* | |
163 * Returns the URI path split into its segments. Each of the | |
164 * segments in the returned list have been decoded. If the path is | |
165 * empty the empty list will be returned. | |
166 */ | |
167 List<String> get pathSegments { | |
168 if (path == "") return const<String>[]; | |
169 return path.split("/").map(Uri.decodeComponent).toList(growable: false); | |
170 } | |
171 | |
172 /* | |
173 * Returns the URI query split into a map according to the rules | |
174 * specified for FORM post in the HTML 4.01 specification. Each key | |
175 * and value in the returned map have been decoded. If there is no | |
176 * query the empty map will be returned. | |
177 */ | |
178 Map<String, String> get queryParameters { | |
179 return query.split("&").fold({}, (map, element) { | |
180 int index = element.indexOf("="); | |
181 if (index == -1) { | |
182 if (!element.isEmpty) map[element] = ""; | |
183 } else if (index != 0) { | |
184 var key = element.substring(0, index); | |
185 var value = element.substring(index + 1); | |
186 map[Uri.decodeQueryComponent(key)] = decodeQueryComponent(value); | |
187 } | |
188 return map; | |
189 }); | |
190 } | |
191 | |
192 static String _makeScheme(String scheme) { | |
193 bool isSchemeLowerCharacter(int ch) { | |
194 return ch < 128 && | |
195 ((_schemeLowerTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); | |
196 } | |
197 | |
198 bool isSchemeCharacter(int ch) { | |
199 return ch < 128 && ((_schemeTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); | |
200 } | |
201 | |
202 if (scheme == null) return ""; | |
203 bool allLowercase = true; | |
204 int length = scheme.length; | |
205 for (int i = 0; i < length; i++) { | |
206 int codeUnit = scheme.codeUnitAt(i); | |
207 if (!isSchemeLowerCharacter(codeUnit)) { | |
208 if (isSchemeCharacter(codeUnit)) { | |
209 allLowercase = false; | |
210 } else { | |
211 throw new ArgumentError('Illegal scheme: $scheme'); | |
212 } | |
213 } | |
214 } | |
215 | |
216 return allLowercase ? scheme : scheme.toLowerCase(); | |
217 } | |
218 | |
219 static String _makePath(String path, List<String> pathSegments) { | |
220 if (path == null && pathSegments == null) return ""; | |
221 if (path != null && pathSegments != null) { | |
222 throw new ArgumentError('Both path and pathSegments specified'); | |
223 } | |
224 if (path != null) return _normalize(path); | |
225 | |
226 return pathSegments.map((s) => _uriEncode(_pathCharTable, s)).join("/"); | |
227 } | |
228 | |
229 static String _makeQuery(String query, Map<String, String> queryParameters) { | |
230 if (query == null && queryParameters == null) return ""; | |
231 if (query != null && queryParameters != null) { | |
232 throw new ArgumentError('Both query and queryParameters specified'); | |
233 } | |
234 if (query != null) return _normalize(query); | |
235 | |
236 var result = new StringBuffer(); | |
237 var first = true; | |
238 queryParameters.forEach((key, value) { | |
239 if (!first) { | |
240 result.write("&"); | |
241 } | |
242 first = false; | |
243 result.write(Uri.encodeQueryComponent(key)); | |
244 if (value != null && !value.isEmpty) { | |
245 result.write("="); | |
246 result.write(Uri.encodeQueryComponent(value)); | |
247 } | |
248 }); | |
249 return result.toString(); | |
250 } | |
251 | |
252 static String _makeFragment(String fragment) { | |
253 if (fragment == null) return ""; | |
254 return _normalize(fragment); | |
255 } | |
256 | |
257 static String _normalize(String component) { | |
258 bool isNormalizedHexDigit(int digit) { | |
259 return (_ZERO <= digit && digit <= _NINE) || | |
260 (_UPPER_CASE_A <= digit && digit <= _UPPER_CASE_F); | |
261 } | |
262 | |
263 bool isLowerCaseHexDigit(int digit) { | |
264 return _LOWER_CASE_A <= digit && digit <= _LOWER_CASE_F; | |
265 } | |
266 | |
267 bool isUnreserved(int ch) { | |
268 return ch < 128 && | |
269 ((_unreservedTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); | |
270 } | |
271 | |
272 int normalizeHexDigit(int index) { | |
273 var codeUnit = component.codeUnitAt(index); | |
274 if (isLowerCaseHexDigit(codeUnit)) { | |
275 return codeUnit - 0x20; | |
276 } else if (!isNormalizedHexDigit(codeUnit)) { | |
277 throw new ArgumentError("Invalid URI component: $component"); | |
278 } else { | |
279 return codeUnit; | |
280 } | |
281 } | |
282 | |
283 int decodeHexDigitPair(int index) { | |
284 int byte = 0; | |
285 for (int i = 0; i < 2; i++) { | |
286 var codeUnit = component.codeUnitAt(index + i); | |
287 if (_ZERO <= codeUnit && codeUnit <= _NINE) { | |
288 byte = byte * 16 + codeUnit - _ZERO; | |
289 } else { | |
290 // Check ranges A-F (0x41-0x46) and a-f (0x61-0x66). | |
291 codeUnit |= 0x20; | |
292 if (_LOWER_CASE_A <= codeUnit && | |
293 codeUnit <= _LOWER_CASE_F) { | |
294 byte = byte * 16 + codeUnit - _LOWER_CASE_A + 10; | |
295 } else { | |
296 throw new ArgumentError( | |
297 "Invalid percent-encoding in URI component: $component"); | |
298 } | |
299 } | |
300 } | |
301 return byte; | |
302 } | |
303 | |
304 // Start building the normalized component string. | |
305 StringBuffer result; | |
306 int length = component.length; | |
307 int index = 0; | |
308 int prevIndex = 0; | |
309 while (index < length) { | |
310 | |
311 // Copy a part of the component string to the result. | |
312 fillResult() { | |
313 if (result == null) { | |
314 assert(prevIndex == 0); | |
315 result = new StringBuffer(component.substring(prevIndex, index)); | |
316 } else { | |
317 result.write(component.substring(prevIndex, index)); | |
318 } | |
319 } | |
320 | |
321 // Normalize percent encoding to uppercase and don't encode | |
322 // unreserved characters. | |
323 if (component.codeUnitAt(index) == _PERCENT) { | |
324 if (length < index + 2) { | |
325 throw new ArgumentError( | |
326 "Invalid percent-encoding in URI component: $component"); | |
327 } | |
328 | |
329 var codeUnit1 = component.codeUnitAt(index + 1); | |
330 var codeUnit2 = component.codeUnitAt(index + 2); | |
331 var decodedCodeUnit = decodeHexDigitPair(index + 1); | |
332 if (isNormalizedHexDigit(codeUnit1) && | |
333 isNormalizedHexDigit(codeUnit2) && | |
334 !isUnreserved(decodedCodeUnit)) { | |
335 index += 3; | |
336 } else { | |
337 fillResult(); | |
338 if (isUnreserved(decodedCodeUnit)) { | |
339 result.writeCharCode(decodedCodeUnit); | |
340 } else { | |
341 result.write("%"); | |
342 result.writeCharCode(normalizeHexDigit(index + 1)); | |
343 result.writeCharCode(normalizeHexDigit(index + 2)); | |
344 } | |
345 index += 3; | |
346 prevIndex = index; | |
347 } | |
348 } else { | |
349 index++; | |
350 } | |
351 } | |
352 assert(index == length); | |
353 | |
354 if (result == null) return component; | |
355 return result.toString(); | |
356 } | |
357 | |
358 static String _emptyIfNull(String val) => val != null ? val : ''; | |
359 | |
360 static int _parseIntOrZero(String val) { | |
361 if (val != null && val != '') { | |
362 return int.parse(val); | |
363 } else { | |
364 return 0; | |
365 } | |
366 } | |
367 | |
368 static String _eitherOf(String val1, String val2) { | |
369 if (val1 != null) return val1; | |
370 if (val2 != null) return val2; | |
371 return ''; | |
372 } | |
373 | |
374 // NOTE: This code was ported from: closure-library/closure/goog/uri/utils.js | |
375 static final RegExp _splitRe = new RegExp( | |
376 '^' | |
377 '(?:' | |
378 '([^:/?#.]+)' // scheme - ignore special characters | |
379 // used by other URL parts such as :, | |
380 // ?, /, #, and . | |
381 ':)?' | |
382 '(?://' | |
383 '(?:([^/?#]*)@)?' // userInfo | |
384 '(?:' | |
385 r'([\w\d\-\u0100-\uffff.%]*)' | |
386 // host - restrict to letters, | |
387 // digits, dashes, dots, percent | |
388 // escapes, and unicode characters. | |
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Why are we restricting more than the RegExp in the
Søren Gjesse
2013/05/28 13:33:26
Currently RegExp is also parsing the user info, ho
| |
389 '|' | |
390 // TODO(ajohnsen): Only allow a max number of parts? | |
391 r'\[([A-Fa-f0-9:.]*)\])' | |
392 // IPv6 host - restrict to hex, | |
393 // dot and colon. | |
394 '(?::([0-9]+))?' // port | |
395 ')?' | |
396 r'([^?#[]+)?' // path | |
397 r'(?:\?([^#]*))?' // query | |
398 '(?:#(.*))?' // fragment | |
399 r'$'); | |
400 | |
401 static const _COMPONENT_SCHEME = 1; | |
402 static const _COMPONENT_USER_INFO = 2; | |
403 static const _COMPONENT_HOST = 3; | |
404 static const _COMPONENT_HOST_IPV6 = 4; | |
405 static const _COMPONENT_PORT = 5; | |
406 static const _COMPONENT_PATH = 6; | |
407 static const _COMPONENT_QUERY_DATA = 7; | |
408 static const _COMPONENT_FRAGMENT = 8; | |
409 | |
410 /** | |
411 * Returns `true` if the URI is absolute. | |
412 */ | |
413 bool get isAbsolute { | |
414 if ("" == scheme) return false; | |
415 if ("" != fragment) return false; | |
416 return true; | |
Lasse Reichstein Nielsen
2013/05/28 08:45:33
just:
return scheme != "" && fragment == "";
Mor
Søren Gjesse
2013/05/28 13:33:26
Change the formatting.
The spec says "...calls fo
| |
417 } | |
418 | |
419 String _merge(String base, String reference) { | |
420 if (base == "") return "/$reference"; | |
421 return "${base.substring(0, base.lastIndexOf("/") + 1)}$reference"; | |
422 } | |
423 | |
424 String _removeDotSegments(String path) { | |
425 List<String> output = []; | |
426 bool appendSlash = false; | |
427 for (String segment in path.split("/")) { | |
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Seems like overkill to split and rebuild every tim
Søren Gjesse
2013/05/28 13:33:26
Done.
| |
428 appendSlash = false; | |
429 if (segment == "..") { | |
430 if (!output.isEmpty && | |
431 ((output.length != 1) || (output[0] != ""))) output.removeLast(); | |
432 appendSlash = true; | |
433 } else if ("." == segment) { | |
434 appendSlash = true; | |
435 } else { | |
436 output.add(segment); | |
437 } | |
438 } | |
439 if (appendSlash) output.add(""); | |
440 return output.join("/"); | |
441 } | |
442 | |
443 Uri resolve(String uri) { | |
444 return resolveUri(Uri.parse(uri)); | |
445 } | |
446 | |
447 Uri resolveUri(Uri reference) { | |
448 // From RFC 3986. | |
449 String targetScheme; | |
450 String targetUserInfo; | |
451 String targetHost; | |
452 int targetPort; | |
453 String targetPath; | |
454 String targetQuery; | |
455 if (reference.scheme != "") { | |
456 targetScheme = reference.scheme; | |
457 targetUserInfo = reference.userInfo; | |
458 targetHost = reference.host; | |
459 targetPort = reference.port; | |
460 targetPath = _removeDotSegments(reference.path); | |
461 targetQuery = reference.query; | |
462 } else { | |
463 if (reference.hasAuthority) { | |
464 targetUserInfo = reference.userInfo; | |
465 targetHost = reference.host; | |
466 targetPort = reference.port; | |
467 targetPath = _removeDotSegments(reference.path); | |
468 targetQuery = reference.query; | |
469 } else { | |
470 if (reference.path == "") { | |
471 targetPath = this.path; | |
472 if (reference.query != "") { | |
473 targetQuery = reference.query; | |
474 } else { | |
475 targetQuery = this.query; | |
476 } | |
477 } else { | |
478 if (reference.path.startsWith("/")) { | |
479 targetPath = _removeDotSegments(reference.path); | |
480 } else { | |
481 targetPath = _removeDotSegments(_merge(this.path, reference.path)); | |
482 } | |
483 targetQuery = reference.query; | |
484 } | |
485 targetUserInfo = this.userInfo; | |
486 targetHost = this.host; | |
487 targetPort = this.port; | |
488 } | |
489 targetScheme = this.scheme; | |
490 } | |
491 return new Uri(scheme: targetScheme, | |
492 userInfo: targetUserInfo, | |
493 host: targetHost, | |
494 port: targetPort, | |
495 path: targetPath, | |
496 query: targetQuery, | |
497 fragment: reference.fragment); | |
498 } | |
499 | |
500 bool get hasAuthority => host != ""; | |
501 | |
502 /** | |
503 * Returns the origin of the URI in the form scheme://host:port for the | |
504 * schemes http and https. | |
505 * | |
506 * Throws StateError if the scheme is not http or https. | |
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Just say "It is an error if ...".
Quote "http" and
Søren Gjesse
2013/05/28 13:33:26
Done.
| |
507 * | |
508 * See: http://www.w3.org/TR/2011/WD-html5-20110405/origin-0.html#origin | |
509 */ | |
510 String get origin { | |
511 if (scheme == "" || host == null || host == "") { | |
512 throw new StateError("Cannot use origin without a scheme: $this"); | |
513 } | |
514 if (scheme != "http" && scheme != "https") { | |
515 throw new StateError( | |
516 "Origin is only applicable schemes http and https: $this"); | |
517 } | |
518 if (port == 0) return "$scheme://$host"; | |
519 return "$scheme://$host:$port"; | |
520 } | |
521 | |
522 void _writeAuthority(StringSink ss) { | |
523 _addIfNonEmpty(ss, userInfo, userInfo, "@"); | |
524 ss.write(host == null ? "null" : | |
525 host.contains(':') ? '[$host]' : host); | |
526 if (port != 0) { | |
527 ss.write(":"); | |
528 ss.write(port.toString()); | |
529 } | |
530 } | |
531 | |
532 String toString() { | |
533 StringBuffer sb = new StringBuffer(); | |
534 _addIfNonEmpty(sb, scheme, scheme, ':'); | |
535 if (hasAuthority || (scheme == "file")) { | |
536 sb.write("//"); | |
537 _writeAuthority(sb); | |
538 } | |
539 sb.write(path); | |
540 _addIfNonEmpty(sb, query, "?", query); | |
541 _addIfNonEmpty(sb, fragment, "#", fragment); | |
542 return sb.toString(); | |
543 } | |
544 | |
545 bool operator==(other) { | |
546 if (other is! Uri) return false; | |
547 Uri uri = other; | |
548 return scheme == uri.scheme && | |
549 userInfo == uri.userInfo && | |
550 host == uri.host && | |
551 port == uri.port && | |
552 path == uri.path && | |
553 query == uri.query && | |
554 fragment == uri.fragment; | |
555 } | |
556 | |
557 int get hashCode { | |
558 int combine(part, current) { | |
559 // The sum is truncated to 30 bits to make sure it fits into a Smi. | |
560 return (current * 31 + part.hashCode) & 0x3FFFFFFF; | |
561 } | |
562 return combine(scheme, combine(userInfo, combine(host, combine(port, | |
563 combine(path, combine(query, combine(fragment, 1))))))); | |
564 } | |
565 | |
566 static void _addIfNonEmpty(StringBuffer sb, String test, | |
567 String first, String second) { | |
568 if ("" != test) { | |
569 sb.write(first == null ? "null" : first); | |
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Since null.toString() == "null", just write
sb.w
Søren Gjesse
2013/05/28 13:33:26
Done.
| |
570 sb.write(second == null ? "null" : second); | |
571 } | |
572 } | |
573 | |
574 /** | |
575 * Encode the string [component] using percent-encoding to make it | |
Lasse Reichstein Nielsen
2013/05/28 08:45:33
I prefer "URL encoding" to "percent-encoding".
Is
Søren Gjesse
2013/05/28 13:33:26
I would like to address the comments in a separate
| |
576 * safe for literal use as a URI component. | |
577 * | |
578 * All characters except uppercase and lowercase letters, digits and | |
579 * the characters `!$&'()*+,;=:@` are percent-encoded. This is the | |
580 * set of characters specified in RFC 2396 and the which is | |
581 * specified for the encodeUriComponent in ECMA-262 version 5.1. | |
582 * | |
583 * When manually encoding path segments or query components remember | |
584 * to encode each part separately before building the path or query | |
585 * string. | |
586 * | |
587 * For encoding the query part consider using | |
588 * [encodeQueryComponent]. | |
589 * | |
590 * To avoid the need for explicitly encoding use the [pathSegments] | |
591 * and [queryParameters] optional named arguments when constructing | |
592 * a Uri. | |
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Uri -> [Uri].
Søren Gjesse
2013/05/28 13:33:26
Done.
| |
593 */ | |
594 static String encodeComponent(String component) { | |
595 return _uriEncode(_unreserved2396Table, component); | |
596 } | |
597 | |
598 /* | |
599 * Encode the string [component] according to the HTML 4.01 rules | |
600 * for encoding the posting of a HTML form as a query string | |
601 * component. | |
602 * | |
603 * Spaces will be replaced with plus and all characters except for | |
604 * uppercase and lowercase letters, decimal digits and the | |
605 * characters `-._~`. Note that the set of characters encoded is a | |
606 * superset of what HTML 4.01 says as it refers to RFC 1738 for | |
607 * reserved characters. | |
608 * | |
609 * When manually encoding query components remember to encode each | |
610 * part separately before building the query string. | |
611 * | |
612 * To avoid the need for explicitly encoding the query use the | |
613 * [queryParameters] optional named arguments when constructing a | |
614 * URI. | |
615 * | |
616 * See http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.2 for more | |
617 * details. | |
618 */ | |
619 static String encodeQueryComponent(String component) { | |
620 return _uriEncode(_unreservedTable, component, spaceToPlus: true); | |
621 } | |
622 | |
623 /** | |
624 * Decodes the percent-encoding in [encodedComponent]. | |
625 * | |
626 * Note that decoding a URI component might change its meaning as | |
627 * some of the decoded characters could be characters with are | |
628 * delimiters for a given URI componene type. Always split a URI | |
629 * component using the delimiters for the component before decoding | |
630 * the individual parts. | |
631 * | |
632 * For handling the [path] and [query] components consider using | |
633 * [pathSegments] and [queryParameters] to get the separated and | |
634 * decoded component. | |
635 */ | |
636 static String decodeComponent(String encodedComponent) { | |
637 return _uriDecode(encodedComponent); | |
638 } | |
639 | |
640 static String decodeQueryComponent(String encodedComponent) { | |
641 return _uriDecode(encodedComponent, plusToSpace: true); | |
642 } | |
643 | |
644 /** | |
645 * Encode the string [uri] using percent-encoding to make it | |
646 * safe for literal use as a full URI. | |
647 * | |
648 * All characters except uppercase and lowercase letters, digits and | |
649 * the characters `!#$&'()*+,-./:;=?@_~` are percent-encoded. This | |
650 * is the set of characters specified in in ECMA-262 version 5.1 for | |
651 * the encodeURI function . | |
652 */ | |
653 static String encodeFull(String uri) { | |
654 return _uriEncode(_encodeFullTable, uri); | |
655 } | |
656 | |
657 /** | |
658 * Decodes the percent-encoding in [uri]. | |
659 * | |
660 * Note that decoding a full URI might change its meaning as some of | |
661 * the decoded characters could be reserved characters. In most | |
662 * cases an encoded URI should be parsed into components using | |
663 * [Uri.parse] before decoding the separate components. | |
664 */ | |
665 static String decodeFull(String uri) { | |
666 return _uriDecode(uri); | |
667 } | |
668 | |
669 // Frequently used character codes. | |
670 static const int _PERCENT = 0x25; | |
671 static const int _ZERO = 0x30; | |
672 static const int _NINE = 0x39; | |
673 static const int _UPPER_CASE_A = 0x41; | |
674 static const int _UPPER_CASE_F = 0x46; | |
675 static const int _LOWER_CASE_A = 0x61; | |
676 static const int _LOWER_CASE_F = 0x66; | |
677 | |
678 /** | |
679 * This is the internal implementation of JavaScript's encodeURI function. | |
680 * It encodes all characters in the string [text] except for those | |
681 * that appear in [canonicalTable], and returns the escaped string. | |
682 */ | |
683 static String _uriEncode(List<int> canonicalTable, | |
684 String text, | |
685 {bool spaceToPlus: false}) { | |
686 byteToHex(int v) { | |
687 final String hex = '0123456789ABCDEF'; | |
688 return '%${hex[v >> 4]}${hex[v & 0x0f]}'; | |
689 } | |
690 | |
691 StringBuffer result = new StringBuffer(); | |
692 for (int i = 0; i < text.length; i++) { | |
693 int ch = text.codeUnitAt(i); | |
694 if (ch < 128 && ((canonicalTable[ch >> 4] & (1 << (ch & 0x0f))) != 0)) { | |
695 result.write(text[i]); | |
696 } else if (spaceToPlus && text[i] == " ") { | |
697 result.write("+"); | |
698 } else { | |
699 if (ch >= 0xD800 && ch < 0xDC00) { | |
700 // Low surrogate. We expect a next char high surrogate. | |
701 ++i; | |
702 int nextCh = text.length == i ? 0 : text.codeUnitAt(i); | |
703 if (nextCh >= 0xDC00 && nextCh < 0xE000) { | |
704 // convert the pair to a U+10000 codepoint | |
705 ch = 0x10000 + ((ch - 0xD800) << 10) + (nextCh - 0xDC00); | |
706 } else { | |
707 throw new ArgumentError('Malformed URI'); | |
708 } | |
709 } | |
710 for (int codepoint in codepointsToUtf8([ch])) { | |
711 result.write(byteToHex(codepoint)); | |
712 } | |
713 } | |
714 } | |
715 return result.toString(); | |
716 } | |
717 | |
718 /** | |
719 * Convert a byte (2 character hex sequence) in string [s] starting | |
720 * at position [pos] to its ordinal value | |
721 */ | |
722 static int _hexCharPairToByte(String s, int pos) { | |
723 int byte = 0; | |
724 for (int i = 0; i < 2; i++) { | |
725 var charCode = s.codeUnitAt(pos + i); | |
726 if (0x30 <= charCode && charCode <= 0x39) { | |
727 byte = byte * 16 + charCode - 0x30; | |
728 } else { | |
729 // Check ranges A-F (0x41-0x46) and a-f (0x61-0x66). | |
730 charCode |= 0x20; | |
731 if (0x61 <= charCode && charCode <= 0x66) { | |
732 byte = byte * 16 + charCode - 0x57; | |
733 } else { | |
734 throw new ArgumentError("Invalid URL encoding"); | |
735 } | |
736 } | |
737 } | |
738 return byte; | |
739 } | |
740 | |
741 /** | |
742 * A JavaScript-like decodeURI function. It unescapes the string [text] and | |
743 * returns the unescaped string. | |
744 */ | |
745 static String _uriDecode(String text, {bool plusToSpace: false}) { | |
746 StringBuffer result = new StringBuffer(); | |
747 List<int> codepoints = new List<int>(); | |
748 for (int i = 0; i < text.length;) { | |
749 String ch = text[i]; | |
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Consider using text.codeUnitAt(i) and work with co
Søren Gjesse
2013/05/28 13:33:26
Done.
| |
750 if (ch != '%') { | |
751 if (plusToSpace && ch == '+') { | |
752 result.write(" "); | |
753 } else { | |
754 result.write(ch); | |
755 } | |
756 i++; | |
757 } else { | |
758 codepoints.clear(); | |
759 while (ch == '%') { | |
760 if (++i > text.length - 2) { | |
761 throw new ArgumentError('Truncated URI'); | |
762 } | |
763 codepoints.add(_hexCharPairToByte(text, i)); | |
764 i += 2; | |
765 if (i == text.length) | |
766 break; | |
Lasse Reichstein Nielsen
2013/05/28 08:45:33
break on line above, or add braces.
Søren Gjesse
2013/05/28 13:33:26
Done.
| |
767 ch = text[i]; | |
768 } | |
769 result.write(decodeUtf8(codepoints)); | |
770 } | |
771 } | |
772 return result.toString(); | |
773 } | |
774 | |
775 // Tables of char-codes organized as a bit vector of 128 bits where | |
776 // each bit indicate whether a character code on the 0-127 needs to | |
777 // be escaped or not. | |
778 | |
779 // The unreserved characters of RFC 3986. | |
780 static const _unreservedTable = const [ | |
781 // LSB MSB | |
782 // | | | |
783 0x0000, // 0x00 - 0x0f 0000000000000000 | |
784 0x0000, // 0x10 - 0x1f 0000000000000000 | |
785 // -. | |
786 0x6000, // 0x20 - 0x2f 0000000000000110 | |
787 // 0123456789 | |
788 0x03ff, // 0x30 - 0x3f 1111111111000000 | |
789 // ABCDEFGHIJKLMNO | |
790 0xfffe, // 0x40 - 0x4f 0111111111111111 | |
791 // PQRSTUVWXYZ _ | |
792 0x87ff, // 0x50 - 0x5f 1111111111100001 | |
793 // abcdefghijklmno | |
794 0xfffe, // 0x60 - 0x6f 0111111111111111 | |
795 // pqrstuvwxyz ~ | |
796 0x47ff]; // 0x70 - 0x7f 1111111111100010 | |
797 | |
798 // The unreserved characters of RFC 2396. | |
799 static const _unreserved2396Table = const [ | |
800 // LSB MSB | |
801 // | | | |
802 0x0000, // 0x00 - 0x0f 0000000000000000 | |
803 0x0000, // 0x10 - 0x1f 0000000000000000 | |
804 // ! '()* -. | |
805 0x6782, // 0x20 - 0x2f 0100000111100110 | |
806 // 0123456789 | |
807 0x03ff, // 0x30 - 0x3f 1111111111000000 | |
808 // ABCDEFGHIJKLMNO | |
809 0xfffe, // 0x40 - 0x4f 0111111111111111 | |
810 // PQRSTUVWXYZ _ | |
811 0x87ff, // 0x50 - 0x5f 1111111111100001 | |
812 // abcdefghijklmno | |
813 0xfffe, // 0x60 - 0x6f 0111111111111111 | |
814 // pqrstuvwxyz ~ | |
815 0x47ff]; // 0x70 - 0x7f 1111111111100010 | |
816 | |
817 // Table of reserved characters specified by ECMAScript 5. | |
818 static const _encodeFullTable = const [ | |
819 // LSB MSB | |
820 // | | | |
821 0x0000, // 0x00 - 0x0f 0000000000000000 | |
822 0x0000, // 0x10 - 0x1f 0000000000000000 | |
823 // ! #$ &'()*+,-./ | |
824 0xf7da, // 0x20 - 0x2f 0101101111101111 | |
825 // 0123456789:; = ? | |
826 0xafff, // 0x30 - 0x3f 1111111111110101 | |
827 // @ABCDEFGHIJKLMNO | |
828 0xffff, // 0x40 - 0x4f 1111111111111111 | |
829 // PQRSTUVWXYZ _ | |
830 0x87ff, // 0x50 - 0x5f 1111111111100001 | |
831 // abcdefghijklmno | |
832 0xfffe, // 0x60 - 0x6f 0111111111111111 | |
833 // pqrstuvwxyz ~ | |
834 0x47ff]; // 0x70 - 0x7f 1111111111100010 | |
835 | |
836 // Characters allowed in the scheme. | |
837 static const _schemeTable = const [ | |
838 // LSB MSB | |
839 // | | | |
840 0x0000, // 0x00 - 0x0f 0000000000000000 | |
841 0x0000, // 0x10 - 0x1f 0000000000000000 | |
842 // + -. | |
843 0x6800, // 0x20 - 0x2f 0000000000010110 | |
844 // 0123456789 | |
845 0x03ff, // 0x30 - 0x3f 1111111111000000 | |
846 // ABCDEFGHIJKLMNO | |
847 0xfffe, // 0x40 - 0x4f 0111111111111111 | |
848 // PQRSTUVWXYZ | |
849 0x07ff, // 0x50 - 0x5f 1111111111100001 | |
850 // abcdefghijklmno | |
851 0xfffe, // 0x60 - 0x6f 0111111111111111 | |
852 // pqrstuvwxyz | |
853 0x07ff]; // 0x70 - 0x7f 1111111111100010 | |
854 | |
855 // Characters allowed in scheme except for upper case letters. | |
856 static const _schemeLowerTable = const [ | |
857 // LSB MSB | |
858 // | | | |
859 0x0000, // 0x00 - 0x0f 0000000000000000 | |
860 0x0000, // 0x10 - 0x1f 0000000000000000 | |
861 // + -. | |
862 0x6800, // 0x20 - 0x2f 0000000000010110 | |
863 // 0123456789 | |
864 0x03ff, // 0x30 - 0x3f 1111111111000000 | |
865 // | |
866 0x0000, // 0x40 - 0x4f 0111111111111111 | |
867 // | |
868 0x0000, // 0x50 - 0x5f 1111111111100001 | |
869 // abcdefghijklmno | |
870 0xfffe, // 0x60 - 0x6f 0111111111111111 | |
871 // pqrstuvwxyz | |
872 0x07ff]; // 0x70 - 0x7f 1111111111100010 | |
873 | |
874 // Sub delimiter characters combined with unreserved as of 3986. | |
875 // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" | |
876 // / "*" / "+" / "," / ";" / "=" | |
877 // RFC 3986 section 2.3. | |
878 // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" | |
879 static const _subDelimitersTable = const [ | |
880 // LSB MSB | |
881 // | | | |
882 0x0000, // 0x00 - 0x0f 0000000000000000 | |
883 0x0000, // 0x10 - 0x1f 0000000000000000 | |
884 // ! $ &'()*+,-. | |
885 0x7fd2, // 0x20 - 0x2f 0100101111111110 | |
886 // 0123456789 ; = | |
887 0x2bff, // 0x30 - 0x3f 1111111111010100 | |
888 // ABCDEFGHIJKLMNO | |
889 0xfffe, // 0x40 - 0x4f 0111111111111111 | |
890 // PQRSTUVWXYZ _ | |
891 0x87ff, // 0x50 - 0x5f 1111111111100001 | |
892 // abcdefghijklmno | |
893 0xfffe, // 0x60 - 0x6f 0111111111111111 | |
894 // pqrstuvwxyz ~ | |
895 0x47ff]; // 0x70 - 0x7f 1111111111100010 | |
896 | |
897 // Characters allowed in the path as of RFC 3986. | |
898 // RFC 3986 section 3.3. | |
899 // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" | |
900 static const _pathCharTable = const [ | |
901 // LSB MSB | |
902 // | | | |
903 0x0000, // 0x00 - 0x0f 0000000000000000 | |
904 0x0000, // 0x10 - 0x1f 0000000000000000 | |
905 // ! $ &'()*+,-. | |
906 0x7fd2, // 0x20 - 0x2f 0100101111111110 | |
907 // 0123456789:; = | |
908 0x2fff, // 0x30 - 0x3f 1111111111110100 | |
909 // @ABCDEFGHIJKLMNO | |
910 0xffff, // 0x40 - 0x4f 1111111111111111 | |
911 // PQRSTUVWXYZ _ | |
912 0x87ff, // 0x50 - 0x5f 1111111111100001 | |
913 // abcdefghijklmno | |
914 0xfffe, // 0x60 - 0x6f 0111111111111111 | |
915 // pqrstuvwxyz ~ | |
916 0x47ff]; // 0x70 - 0x7f 1111111111100010 | |
917 | |
918 // Characters allowed in the query as of RFC 3986. | |
919 // RFC 3986 section 3.4. | |
920 // query = *( pchar / "/" / "?" ) | |
921 static const _queryCharTable = const [ | |
922 // LSB MSB | |
923 // | | | |
924 0x0000, // 0x00 - 0x0f 0000000000000000 | |
925 0x0000, // 0x10 - 0x1f 0000000000000000 | |
926 // ! $ &'()*+,-./ | |
927 0xffd2, // 0x20 - 0x2f 0100101111111111 | |
928 // 0123456789:; = ? | |
929 0xafff, // 0x30 - 0x3f 1111111111110101 | |
930 // @ABCDEFGHIJKLMNO | |
931 0xffff, // 0x40 - 0x4f 1111111111111111 | |
932 // PQRSTUVWXYZ _ | |
933 0x87ff, // 0x50 - 0x5f 1111111111100001 | |
934 // abcdefghijklmno | |
935 0xfffe, // 0x60 - 0x6f 0111111111111111 | |
936 // pqrstuvwxyz ~ | |
937 0x47ff]; // 0x70 - 0x7f 1111111111100010 | |
938 } | |
OLD | NEW |