Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(540)

Side by Side Diff: sdk/lib/core/uri.dart

Issue 337033003: Revert "New, more validating, parser for URI." and follow-up patches. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/bin/builtin.dart ('k') | sdk/lib/io/http_impl.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 part of dart.core; 5 part of dart.core;
6 6
7 /** 7 /**
8 * A parsed URI, such as a URL. 8 * A parsed URI, such as a URL.
9 * 9 *
10 * **See also:** 10 * **See also:**
(...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after
165 // segment = *pchar 165 // segment = *pchar
166 // segment-nz = 1*pchar 166 // segment-nz = 1*pchar
167 // segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) 167 // segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
168 // ; non-zero-length segment without any colon ":" 168 // ; non-zero-length segment without any colon ":"
169 // 169 //
170 // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 170 // pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
171 // 171 //
172 // query = *( pchar / "/" / "?" ) 172 // query = *( pchar / "/" / "?" )
173 // 173 //
174 // fragment = *( pchar / "/" / "?" ) 174 // fragment = *( pchar / "/" / "?" )
175 bool isRegName(int ch) {
176 return ch < 128 && ((_regNameTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
177 }
175 178
176 int ipV6Address(int index) { 179 int ipV6Address(int index) {
177 // IPv6. Skip to ']'. 180 // IPv6. Skip to ']'.
178 int endIndex = uri.indexOf(']', index); 181 index = uri.indexOf(']', index);
179 if (endIndex < 0) { 182 if (index == -1) {
180 _fail(uri, index - 1, "Unmatched [ in host name"); 183 throw new FormatException("Bad end of IPv6 host");
181 } 184 }
182 return endIndex + 1; 185 return index + 1;
183 } 186 }
184 187
185 int length = uri.length; 188 int length = uri.length;
186 int index = 0; 189 int index = 0;
187 190
188 int schemeEndIndex = 0; 191 int schemeEndIndex = 0;
189 192
190 if (length == 0) { 193 if (length == 0) {
191 return new Uri(); 194 return new Uri();
192 } 195 }
193 // Whether to allow a colon in the first path segment.
194 bool allowColon = false;
195 196
196 if (_isAlphabeticCharacter(uri.codeUnitAt(0))) { 197 if (uri.codeUnitAt(0) != _SLASH) {
197 // Can be scheme. 198 // Can be scheme.
198 while (index < length) { 199 while (index < length) {
199 // Look for ':' to end the scheme. 200 // Look for ':'. If found, continue from the post of ':'. If not (end
200 // If found continue from after ':'. 201 // reached or invalid scheme char found) back up one char, and continue
201 // If not (end reached or invalid scheme char found) back up one char, 202 // to path.
202 // and continue as a path.
203 // Note that scheme-chars is contained in path-chars. 203 // Note that scheme-chars is contained in path-chars.
204 int codeUnit = uri.codeUnitAt(index++); 204 int codeUnit = uri.codeUnitAt(index++);
205 if (!_isSchemeCharacter(codeUnit)) { 205 if (!_isSchemeCharacter(codeUnit)) {
206 if (codeUnit == _COLON) { 206 if (codeUnit == _COLON) {
207 schemeEndIndex = index; 207 schemeEndIndex = index;
208 allowColon = true; // Scheme detected, allow colon in path.
209 } else { 208 } else {
210 // Back up one char, since we met an invalid scheme char. 209 // Back up one char, since we met an invalid scheme char.
211 index--; 210 index--;
212 } 211 }
213 break; 212 break;
214 } 213 }
215 } 214 }
216 } 215 }
217 216
218 int userInfoEndIndex = -1; 217 int userInfoEndIndex = -1;
219 int portIndex = -1; 218 int portIndex = -1;
220 int authorityEndIndex = schemeEndIndex; 219 int authorityEndIndex = schemeEndIndex;
221 // If we see '//', there must be an authority. 220 // If we see '//', there must be an authority.
222 if (authorityEndIndex == index && 221 if (authorityEndIndex == index &&
223 authorityEndIndex + 1 < length && 222 authorityEndIndex + 1 < length &&
224 uri.codeUnitAt(authorityEndIndex) == _SLASH && 223 uri.codeUnitAt(authorityEndIndex) == _SLASH &&
225 uri.codeUnitAt(authorityEndIndex + 1) == _SLASH) { 224 uri.codeUnitAt(authorityEndIndex + 1) == _SLASH) {
226 // Skip '//'. 225 // Skip '//'.
227 allowColon = true; // First slash seen, allow colon in path.
228 authorityEndIndex += 2; 226 authorityEndIndex += 2;
229 // It can both be host and userInfo. 227 // It can both be host and userInfo.
230 while (authorityEndIndex < length) { 228 while (authorityEndIndex < length) {
231 int codeUnit = uri.codeUnitAt(authorityEndIndex++); 229 int codeUnit = uri.codeUnitAt(authorityEndIndex++);
232 if (!_isRegNameChar(codeUnit)) { 230 if (!isRegName(codeUnit)) {
233 if (codeUnit == _LEFT_BRACKET) { 231 if (codeUnit == _LEFT_BRACKET) {
234 authorityEndIndex = ipV6Address(authorityEndIndex); 232 authorityEndIndex = ipV6Address(authorityEndIndex);
235 } else if (portIndex == -1 && codeUnit == _COLON) { 233 } else if (portIndex == -1 && codeUnit == _COLON) {
236 // First time ':'. 234 // First time ':'.
237 portIndex = authorityEndIndex; 235 portIndex = authorityEndIndex;
238 } else if (codeUnit == _AT_SIGN || codeUnit == _COLON) { 236 } else if (codeUnit == _AT_SIGN || codeUnit == _COLON) {
239 // Second time ':' or first '@'. Must be userInfo. 237 // Second time ':' or first '@'. Must be userInfo.
240 if (codeUnit == _AT_SIGN) { 238 userInfoEndIndex = uri.indexOf('@', authorityEndIndex - 1);
241 userInfoEndIndex = authorityEndIndex - 1; 239 // Not found. Must be path then.
242 } else { 240 if (userInfoEndIndex == -1) {
243 userInfoEndIndex = uri.indexOf('@', authorityEndIndex); 241 authorityEndIndex = index;
244 // @ Not found after something that can only be userinfo. 242 break;
245 if (userInfoEndIndex < 0) {
246 _fail(uri, uri.length, "No '@' after userinfo");
247 }
248 } 243 }
249 portIndex = -1; 244 portIndex = -1;
250 authorityEndIndex = userInfoEndIndex + 1; 245 authorityEndIndex = userInfoEndIndex + 1;
251 // Now it can only be host:port. 246 // Now it can only be host:port.
252 while (authorityEndIndex < length) { 247 while (authorityEndIndex < length) {
253 int codeUnit = uri.codeUnitAt(authorityEndIndex++); 248 int codeUnit = uri.codeUnitAt(authorityEndIndex++);
254 if (!_isRegNameChar(codeUnit)) { 249 if (!isRegName(codeUnit)) {
255 if (codeUnit == _LEFT_BRACKET) { 250 if (codeUnit == _LEFT_BRACKET) {
256 authorityEndIndex = ipV6Address(authorityEndIndex); 251 authorityEndIndex = ipV6Address(authorityEndIndex);
257 } else if (codeUnit == _COLON) { 252 } else if (codeUnit == _COLON) {
258 if (portIndex != -1) { 253 if (portIndex != -1) {
259 throw new FormatException("Double port in host"); 254 throw new FormatException("Double port in host");
260 } 255 }
261 portIndex = authorityEndIndex; 256 portIndex = authorityEndIndex;
262 } else { 257 } else {
263 authorityEndIndex--; 258 authorityEndIndex--;
264 break; 259 break;
265 } 260 }
266 } 261 }
267 } 262 }
268 break; 263 break;
269 } else { 264 } else {
270 authorityEndIndex--; 265 authorityEndIndex--;
271 break; 266 break;
272 } 267 }
273 } 268 }
274 } 269 }
275 if (authorityEndIndex < length) {
276 // path-abempty - either absolute or empty, so we need a slash if
277 // there is a path.
278 int codeUnit = uri.codeUnitAt(authorityEndIndex);
279 if (codeUnit != _SLASH &&
280 codeUnit != _QUESTION &&
281 codeUnit != _NUMBER_SIGN) {
282 _fail(uri, authorityEndIndex, "Invalid character in authority");
283 }
284 }
285 } else { 270 } else {
286 authorityEndIndex = schemeEndIndex; 271 authorityEndIndex = schemeEndIndex;
287 } 272 }
288 273
289 // At path now. 274 // At path now.
290 int pathEndIndex = authorityEndIndex; 275 int pathEndIndex = authorityEndIndex;
291 if (!allowColon) {
292 while (pathEndIndex < length) {
293 int codeUnit = uri.codeUnitAt(pathEndIndex++);
294 if (codeUnit == _QUESTION || codeUnit == _NUMBER_SIGN) {
295 pathEndIndex--;
296 break;
297 }
298 if (codeUnit == _SLASH) break;
299 if (codeUnit == _COLON) {
300 _fail(uri, pathEndIndex - 1, "Colon in initial path segment");
301 }
302 }
303 }
304 while (pathEndIndex < length) { 276 while (pathEndIndex < length) {
305 int codeUnit = uri.codeUnitAt(pathEndIndex++); 277 int codeUnit = uri.codeUnitAt(pathEndIndex++);
306 if (codeUnit == _QUESTION || codeUnit == _NUMBER_SIGN) { 278 if (codeUnit == _QUESTION || codeUnit == _NUMBER_SIGN) {
307 pathEndIndex--; 279 pathEndIndex--;
308 break; 280 break;
309 } 281 }
310 } 282 }
311 283
312 // Maybe query. 284 // Maybe query.
313 int queryEndIndex = pathEndIndex; 285 int queryEndIndex = pathEndIndex;
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
361 333
362 return new Uri(scheme: scheme, 334 return new Uri(scheme: scheme,
363 userInfo: userInfo, 335 userInfo: userInfo,
364 host: host, 336 host: host,
365 port: port, 337 port: port,
366 path: path, 338 path: path,
367 query: query, 339 query: query,
368 fragment: fragment); 340 fragment: fragment);
369 } 341 }
370 342
371 // Report a parse failure.
372 static void _fail(String uri, int index, String message) {
373 // TODO(lrn): Consider adding this to FormatException.
374 if (index == uri.length) {
375 message += " at end of input.";
376 } else {
377 message += " at position $index.\n";
378 // Pick a slice of uri containing index and, if
379 // necessary, truncate the ends to ensure the entire
380 // slice fits on one line.
381 int min = 0;
382 int max = uri.length;
383 String pre = "";
384 String post = "";
385 if (uri.length > 78) {
386 min = index - 10;
387 if (min < 0) min = 0;
388 int max = min + 72;
389 if (max > uri.length) {
390 max = uri.length;
391 min = max - 72;
392 }
393 if (min != 0) pre = "...";
394 if (max != uri.length) post = "...";
395 }
396 // Combine message, slice and a caret pointing to the error index.
397 message = "$message$pre${uri.substring(min, max)}$post\n"
398 "${' ' * (pre.length + index - min)}^";
399 }
400 throw new FormatException(message);
401 }
402
403 /** 343 /**
404 * Creates a new URI from its components. 344 * Creates a new URI from its components.
405 * 345 *
406 * Each component is set through a named argument. Any number of 346 * Each component is set through a named argument. Any number of
407 * components can be provided. The default value for the components 347 * components can be provided. The default value for the components
408 * not provided is the empry string, except for [port] which has a 348 * not provided is the empry string, except for [port] which has a
409 * default value of 0. The [path] and [query] components can be set 349 * default value of 0. The [path] and [query] components can be set
410 * using two different named arguments. 350 * using two different named arguments.
411 * 351 *
412 * The scheme component is set through [scheme]. The scheme is 352 * The scheme component is set through [scheme]. The scheme is
(...skipping 27 matching lines...) Expand all
440 * expected to be fully percent-encoded and is used in its literal 380 * expected to be fully percent-encoded and is used in its literal
441 * form. When [queryParameters] is used the query is built from the 381 * form. When [queryParameters] is used the query is built from the
442 * provided map. Each key and value in the map is percent-encoded 382 * provided map. Each key and value in the map is percent-encoded
443 * and joined using equal and ampersand characters. The 383 * and joined using equal and ampersand characters. The
444 * percent-encoding of the keys and values encodes all characters 384 * percent-encoding of the keys and values encodes all characters
445 * except for the unreserved characters. 385 * except for the unreserved characters.
446 * 386 *
447 * The fragment component is set through [fragment]. 387 * The fragment component is set through [fragment].
448 */ 388 */
449 Uri({String scheme, 389 Uri({String scheme,
450 String userInfo: "", 390 this.userInfo: "",
451 String host: "", 391 String host: "",
452 port: 0, 392 port: 0,
453 String path, 393 String path,
454 Iterable<String> pathSegments, 394 Iterable<String> pathSegments,
455 String query, 395 String query,
456 Map<String, String> queryParameters, 396 Map<String, String> queryParameters,
457 fragment: ""}) : 397 fragment: ""}) :
458 scheme = _makeScheme(scheme), 398 scheme = _makeScheme(scheme),
459 userInfo = _makeUserInfo(userInfo),
460 _host = _makeHost(host), 399 _host = _makeHost(host),
461 query = _makeQuery(query, queryParameters), 400 query = _makeQuery(query, queryParameters),
462 fragment = _makeFragment(fragment) { 401 fragment = _makeFragment(fragment) {
463 // Perform scheme specific normalization. 402 // Perform scheme specific normalization.
464 if (scheme == "http" && port == 80) { 403 if (scheme == "http" && port == 80) {
465 _port = 0; 404 _port = 0;
466 } else if (scheme == "https" && port == 443) { 405 } else if (scheme == "https" && port == 443) {
467 _port = 0; 406 _port = 0;
468 } else { 407 } else {
469 _port = port; 408 _port = port;
(...skipping 351 matching lines...) Expand 10 before | Expand all | Expand 10 after
821 */ 760 */
822 Map<String, String> get queryParameters { 761 Map<String, String> get queryParameters {
823 if (_queryParameters == null) { 762 if (_queryParameters == null) {
824 _queryParameters = new UnmodifiableMapView(splitQueryString(query)); 763 _queryParameters = new UnmodifiableMapView(splitQueryString(query));
825 } 764 }
826 return _queryParameters; 765 return _queryParameters;
827 } 766 }
828 767
829 static String _makeHost(String host) { 768 static String _makeHost(String host) {
830 if (host == null || host.isEmpty) return host; 769 if (host == null || host.isEmpty) return host;
831 // Host is an IPv6 address if it starts with '[' or contains a colon.
832 if (host.codeUnitAt(0) == _LEFT_BRACKET) { 770 if (host.codeUnitAt(0) == _LEFT_BRACKET) {
833 if (host.codeUnitAt(host.length - 1) != _RIGHT_BRACKET) { 771 if (host.codeUnitAt(host.length - 1) != _RIGHT_BRACKET) {
834 throw new FormatException('Missing end `]` to match `[` in host'); 772 throw new FormatException('Missing end `]` to match `[` in host');
835 } 773 }
836 parseIPv6Address(host.substring(1, host.length - 1)); 774 parseIPv6Address(host.substring(1, host.length - 1));
837 return host; 775 return host;
838 } 776 }
839 // TODO(lrn): skip if too short to be a valid IPv6 address.
840 for (int i = 0; i < host.length; i++) { 777 for (int i = 0; i < host.length; i++) {
841 if (host.codeUnitAt(i) == _COLON) { 778 if (host.codeUnitAt(i) == _COLON) {
842 parseIPv6Address(host); 779 parseIPv6Address(host);
843 return '[$host]'; 780 return '[$host]';
844 } 781 }
845 } 782 }
846 return _normalizeRegName(host); 783 return host;
847 } 784 }
848 785
849 static bool _isRegNameChar(int char) { 786 static String _makeScheme(String scheme) {
850 return char < 127 && (_regNameTable[char >> 4] & (1 << (char & 0xf))) != 0; 787 bool isSchemeLowerCharacter(int ch) {
851 } 788 return ch < 128 &&
789 ((_schemeLowerTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
790 }
852 791
853 /** 792 if (scheme == null) return "";
854 * Validates and does case- and percent-encoding normalization. 793 bool allLowercase = true;
855 * 794 int length = scheme.length;
856 * The [host] must be an RFC3986 "reg-name". It is converted 795 for (int i = 0; i < length; i++) {
857 * to lower case, and percent escapes are converted to either 796 int codeUnit = scheme.codeUnitAt(i);
858 * lower case unreserved characters or upper case escapes. 797 if (i == 0 && !_isAlphabeticCharacter(codeUnit)) {
859 */ 798 // First code unit must be an alphabetic character.
860 static String _normalizeRegName(String host) { 799 throw new ArgumentError('Illegal scheme: $scheme');
861 StringBuffer buffer; 800 }
862 int sectionStart = 0; 801 if (!isSchemeLowerCharacter(codeUnit)) {
863 int index = 0; 802 if (_isSchemeCharacter(codeUnit)) {
864 // Whether all characters between sectionStart and index are normalized, 803 allLowercase = false;
865 bool isNormalized = true; 804 } else {
866 805 throw new ArgumentError('Illegal scheme: $scheme');
867 while (index < host.length) {
868 int char = host.codeUnitAt(index);
869 if (char == _PERCENT) {
870 // The _regNameTable contains "%", so we check that first.
871 String replacement = _normalizeEscape(host, index, true);
872 if (replacement == null && isNormalized) {
873 index += 3;
874 continue;
875 } 806 }
876 if (buffer == null) buffer = new StringBuffer();
877 String slice = host.substring(sectionStart, index);
878 if (!isNormalized) slice = slice.toLowerCase();
879 buffer.write(slice);
880 if (replacement == null) replacement = host.substring(index, index + 3);
881 buffer.write(replacement);
882 index += 3;
883 sectionStart = index;
884 isNormalized = true;
885 } else if (_isRegNameChar(char)) {
886 if (isNormalized && _UPPER_CASE_A <= char && _UPPER_CASE_Z >= char) {
887 // Put initial slice in buffer and continue in non-normalized mode
888 if (buffer == null) buffer = new StringBuffer();
889 if (sectionStart < index) {
890 buffer.write(host.substring(sectionStart, index));
891 sectionStart = index;
892 }
893 isNormalized = false;
894 }
895 index++;
896 } else {
897 _fail(host, index, "Invalid character");
898 } 807 }
899 } 808 }
900 if (buffer == null) return host;
901 if (sectionStart < host.length) {
902 String slice = host.substring(sectionStart);
903 if (!isNormalized) slice = slice.toLowerCase();
904 buffer.write(slice);
905 }
906 return buffer.toString();
907 }
908 809
909 /**
910 * Validates scheme characters and does case-normalization.
911 *
912 * Schemes are converted to lower case. They cannot contain escapes.
913 */
914 static String _makeScheme(String scheme) {
915 if (scheme == null || scheme.isEmpty) return "";
916 int char = scheme.codeUnitAt(0);
917 if (!_isAlphabeticCharacter(char)) {
918 _fail(scheme, 0, "Non-alphabetic character starting scheme");
919 }
920 bool allLowercase = char > _LOWER_CASE_A;
921 for (int i = 0; i < scheme.length; i++) {
922 int codeUnit = scheme.codeUnitAt(i);
923 if (!_isSchemeCharacter(codeUnit)) {
924 _fail(scheme, i, "Illegal scheme character");
925 }
926 if (_LOWER_CASE_A <= codeUnit && _LOWER_CASE_Z >= codeUnit) {
927 allLowercase = false;
928 }
929 }
930 return allLowercase ? scheme : scheme.toLowerCase(); 810 return allLowercase ? scheme : scheme.toLowerCase();
931 } 811 }
932 812
933 static String _makeUserInfo(String userInfo) {
934 if (userInfo == null) return "null";
935 return _normalize(userInfo, _userinfoTable);
936 }
937
938 static bool _isPathCharacter(int ch) {
939 return ch < 128 && ((_pathCharTable[ch >> 4] & (1 << (ch & 0x0f))) != 0) ||
940 ch == _SLASH;
941 }
942
943 String _makePath(String path, Iterable<String> pathSegments) { 813 String _makePath(String path, Iterable<String> pathSegments) {
944 if (path == null && pathSegments == null) return ""; 814 if (path == null && pathSegments == null) return "";
945 if (path != null && pathSegments != null) { 815 if (path != null && pathSegments != null) {
946 throw new ArgumentError('Both path and pathSegments specified'); 816 throw new ArgumentError('Both path and pathSegments specified');
947 } 817 }
948 // TODO(lrn): Do path normalization to remove /./ and /../ segments.
949 var result; 818 var result;
950 if (path != null) { 819 if (path != null) {
951 result = _normalize(path, _pathCharOrSlashTable); 820 result = _normalize(path);
952 } else { 821 } else {
953 result = pathSegments.map((s) => _uriEncode(_pathCharTable, s)).join("/"); 822 result = pathSegments.map((s) => _uriEncode(_pathCharTable, s)).join("/");
954 } 823 }
955 if ((hasAuthority || (scheme == "file")) && 824 if ((hasAuthority || (scheme == "file")) &&
956 result.isNotEmpty && !result.startsWith("/")) { 825 result.isNotEmpty && !result.startsWith("/")) {
957 return "/$result"; 826 return "/$result";
958 } 827 }
959 return result; 828 return result;
960 } 829 }
961 830
962 static String _makeQuery(String query, Map<String, String> queryParameters) { 831 static String _makeQuery(String query, Map<String, String> queryParameters) {
963 if (query == null && queryParameters == null) return ""; 832 if (query == null && queryParameters == null) return "";
964 if (query != null && queryParameters != null) { 833 if (query != null && queryParameters != null) {
965 throw new ArgumentError('Both query and queryParameters specified'); 834 throw new ArgumentError('Both query and queryParameters specified');
966 } 835 }
967 if (query != null) return _normalize(query, _queryCharTable); 836 if (query != null) return _normalize(query);
968 837
969 var result = new StringBuffer(); 838 var result = new StringBuffer();
970 var first = true; 839 var first = true;
971 queryParameters.forEach((key, value) { 840 queryParameters.forEach((key, value) {
972 if (!first) { 841 if (!first) {
973 result.write("&"); 842 result.write("&");
974 } 843 }
975 first = false; 844 first = false;
976 result.write(Uri.encodeQueryComponent(key)); 845 result.write(Uri.encodeQueryComponent(key));
977 if (value != null && !value.isEmpty) { 846 if (value != null && !value.isEmpty) {
978 result.write("="); 847 result.write("=");
979 result.write(Uri.encodeQueryComponent(value)); 848 result.write(Uri.encodeQueryComponent(value));
980 } 849 }
981 }); 850 });
982 return result.toString(); 851 return result.toString();
983 } 852 }
984 853
985 static String _makeFragment(String fragment) { 854 static String _makeFragment(String fragment) {
986 if (fragment == null) return ""; 855 if (fragment == null) return "";
987 return _normalize(fragment, _queryCharTable); 856 return _normalize(fragment);
988 } 857 }
989 858
990 static bool _isLowerCaseHexDigit(int digit) { 859 static String _normalize(String component) {
991 return _LOWER_CASE_A <= digit && digit <= _LOWER_CASE_F; 860 int index = component.indexOf('%');
992 } 861 if (index < 0) return component;
993 862
994 /** Returns whether char is a hex digit. */ 863 bool isNormalizedHexDigit(int digit) {
995 static bool _isHexDigit(int char) { 864 return (_ZERO <= digit && digit <= _NINE) ||
996 if (_NINE >= char) return _ZERO <= char; 865 (_UPPER_CASE_A <= digit && digit <= _UPPER_CASE_F);
997 char |= 0x20; 866 }
998 return _LOWER_CASE_A <= char && _LOWER_CASE_F >= char;
999 }
1000 867
1001 /** Returns value of char as hex digit. */ 868 bool isLowerCaseHexDigit(int digit) {
1002 static int _hexValue(int digit) { 869 return _LOWER_CASE_A <= digit && digit <= _LOWER_CASE_F;
1003 assert(_isHexDigit(digit)); 870 }
1004 if (_NINE >= digit) return digit - _ZERO;
1005 return (digit | 0x20) - (_LOWER_CASE_A - 10);
1006 }
1007 871
1008 /** 872 bool isUnreserved(int ch) {
1009 * Performs RFC 3986 Percent-Encoding Normalization. 873 return ch < 128 &&
1010 * 874 ((_unreservedTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
1011 * Returns a replacement string that should be replace the original escape.
1012 * Returns null if no replacement is necessary because the escape is
1013 * not for an unreserved character and is already non-lower-case.
1014 *
1015 * If [lowerCase] is true, a single character returned is always lower case,
1016 */
1017 static String _normalizeEscape(String source, int index, bool lowerCase) {
1018 assert(source.codeUnitAt(index) == _PERCENT);
1019 if (index + 2 >= source.length) {
1020 _fail(source, index, "Unterminated percent escape");
1021 } 875 }
1022 int firstDigit = source.codeUnitAt(index + 1);
1023 int secondDigit = source.codeUnitAt(index + 2);
1024 if (!_isHexDigit(firstDigit) || !_isHexDigit(secondDigit)) {
1025 _fail(source, index, "Invalid escape");
1026 }
1027 int value = _hexValue(firstDigit) * 16 + _hexValue(secondDigit);
1028 if (_isUnreservedChar(value)) {
1029 if (lowerCase && _UPPER_CASE_A <= value && _UPPER_CASE_Z >= value) {
1030 value |= 0x20;
1031 }
1032 return new String.fromCharCode(value);
1033 }
1034 if (firstDigit >= _LOWER_CASE_A || secondDigit >= _LOWER_CASE_A) {
1035 // Either digit is lower case.
1036 return source.substring(index, index + 3).toUpperCase();
1037 }
1038 return null;
1039 }
1040 876
1041 static bool _isUnreservedChar(int ch) { 877 int normalizeHexDigit(int index) {
1042 return ch < 127 && 878 var codeUnit = component.codeUnitAt(index);
1043 ((_unreservedTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); 879 if (isLowerCaseHexDigit(codeUnit)) {
1044 } 880 return codeUnit - 0x20;
1045 881 } else if (!isNormalizedHexDigit(codeUnit)) {
1046 882 throw new ArgumentError("Invalid URI component: $component");
1047 /**
1048 * Runs through component checking that each character is valid and
1049 * normalize percent escapes.
1050 *
1051 * Uses [charTable] to check if a non-`%` character is allowed.
1052 * Each `%` character must be followed by two hex digits.
1053 * If the hex-digits are lower case letters, they are converted to
1054 * upper case.
1055 */
1056 static String _normalize(String component, List<int> charTable) {
1057 StringBuffer buffer;
1058 int sectionStart = 0;
1059 int index = 0;
1060 // Loop while characters are valid and escapes correct and upper-case.
1061 while (index < component.length) {
1062 int char = component.codeUnitAt(index);
1063 if (char < 127 && (charTable[char >> 4] & (1 << (char & 0x0f))) != 0) {
1064 index++;
1065 } else if (char == _PERCENT) {
1066 String replacement = _normalizeEscape(component, index, false);
1067 if (replacement == null) {
1068 // _normalizeEscape returns null if no replacement necessary.
1069 index += 3;
1070 continue;
1071 } else {
1072 if (buffer == null) buffer = new StringBuffer();
1073 buffer.write(component.substring(sectionStart, index));
1074 buffer.write(replacement);
1075 index += 3;
1076 sectionStart = index;
1077 }
1078 } else { 883 } else {
1079 _fail(component, index, "Invalid character"); 884 return codeUnit;
1080 } 885 }
1081 } 886 }
1082 if (buffer == null) return component; 887
1083 if (sectionStart < component.length) { 888 int decodeHexDigitPair(int index) {
1084 buffer.write(component.substring(sectionStart)); 889 int byte = 0;
890 for (int i = 0; i < 2; i++) {
891 var codeUnit = component.codeUnitAt(index + i);
892 if (_ZERO <= codeUnit && codeUnit <= _NINE) {
893 byte = byte * 16 + codeUnit - _ZERO;
894 } else {
895 // Check ranges A-F (0x41-0x46) and a-f (0x61-0x66).
896 codeUnit |= 0x20;
897 if (_LOWER_CASE_A <= codeUnit &&
898 codeUnit <= _LOWER_CASE_F) {
899 byte = byte * 16 + codeUnit - _LOWER_CASE_A + 10;
900 } else {
901 throw new ArgumentError(
902 "Invalid percent-encoding in URI component: $component");
903 }
904 }
905 }
906 return byte;
1085 } 907 }
1086 return buffer.toString(); 908
909 // Start building the normalized component string.
910 StringBuffer result;
911 int length = component.length;
912 int prevIndex = 0;
913
914 // Copy a part of the component string to the result.
915 void fillResult() {
916 if (result == null) {
917 assert(prevIndex == 0);
918 result = new StringBuffer(component.substring(prevIndex, index));
919 } else {
920 result.write(component.substring(prevIndex, index));
921 }
922 }
923
924 while (index < length) {
925 // Normalize percent-encoding to uppercase and don't encode
926 // unreserved characters.
927 assert(component.codeUnitAt(index) == _PERCENT);
928 if (length < index + 2) {
929 throw new ArgumentError(
930 "Invalid percent-encoding in URI component: $component");
931 }
932
933 var codeUnit1 = component.codeUnitAt(index + 1);
934 var codeUnit2 = component.codeUnitAt(index + 2);
935 var decodedCodeUnit = decodeHexDigitPair(index + 1);
936 if (isNormalizedHexDigit(codeUnit1) &&
937 isNormalizedHexDigit(codeUnit2) &&
938 !isUnreserved(decodedCodeUnit)) {
939 index += 3;
940 } else {
941 fillResult();
942 if (isUnreserved(decodedCodeUnit)) {
943 result.writeCharCode(decodedCodeUnit);
944 } else {
945 result.write("%");
946 result.writeCharCode(normalizeHexDigit(index + 1));
947 result.writeCharCode(normalizeHexDigit(index + 2));
948 }
949 index += 3;
950 prevIndex = index;
951 }
952 int next = component.indexOf('%', index);
953 if (next >= index) {
954 index = next;
955 } else {
956 index = length;
957 }
958 }
959 if (result == null) return component;
960
961 if (result != null && prevIndex != index) fillResult();
962 assert(index == length);
963
964 return result.toString();
1087 } 965 }
1088 966
1089 static bool _isSchemeCharacter(int ch) { 967 static bool _isSchemeCharacter(int ch) {
1090 return ch < 128 && ((_schemeTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); 968 return ch < 128 && ((_schemeTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
1091 } 969 }
1092 970
971
1093 /** 972 /**
1094 * Returns whether the URI is absolute. 973 * Returns whether the URI is absolute.
1095 */ 974 */
1096 bool get isAbsolute => scheme != "" && fragment == ""; 975 bool get isAbsolute => scheme != "" && fragment == "";
1097 976
1098 String _merge(String base, String reference) { 977 String _merge(String base, String reference) {
1099 if (base == "") return "/$reference"; 978 if (base == "") return "/$reference";
1100 return "${base.substring(0, base.lastIndexOf("/") + 1)}$reference"; 979 return "${base.substring(0, base.lastIndexOf("/") + 1)}$reference";
1101 } 980 }
1102 981
(...skipping 606 matching lines...) Expand 10 before | Expand all | Expand 10 after
1709 1588
1710 /** 1589 /**
1711 * This is the internal implementation of JavaScript's encodeURI function. 1590 * This is the internal implementation of JavaScript's encodeURI function.
1712 * It encodes all characters in the string [text] except for those 1591 * It encodes all characters in the string [text] except for those
1713 * that appear in [canonicalTable], and returns the escaped string. 1592 * that appear in [canonicalTable], and returns the escaped string.
1714 */ 1593 */
1715 static String _uriEncode(List<int> canonicalTable, 1594 static String _uriEncode(List<int> canonicalTable,
1716 String text, 1595 String text,
1717 {Encoding encoding: UTF8, 1596 {Encoding encoding: UTF8,
1718 bool spaceToPlus: false}) { 1597 bool spaceToPlus: false}) {
1719 void byteToHex(byte, buffer) { 1598 byteToHex(byte, buffer) {
1720 const String hex = '0123456789ABCDEF'; 1599 const String hex = '0123456789ABCDEF';
1721 buffer.writeCharCode(hex.codeUnitAt(byte >> 4)); 1600 buffer.writeCharCode(hex.codeUnitAt(byte >> 4));
1722 buffer.writeCharCode(hex.codeUnitAt(byte & 0x0f)); 1601 buffer.writeCharCode(hex.codeUnitAt(byte & 0x0f));
1723 } 1602 }
1724 1603
1725 // Encode the string into bytes then generate an ASCII only string 1604 // Encode the string into bytes then generate an ASCII only string
1726 // by percent encoding selected bytes. 1605 // by percent encoding selected bytes.
1727 StringBuffer result = new StringBuffer(); 1606 StringBuffer result = new StringBuffer();
1728 var bytes = encoding.encode(text); 1607 var bytes = encoding.encode(text);
1729 for (int i = 0; i < bytes.length; i++) { 1608 for (int i = 0; i < bytes.length; i++) {
1730 int byte = bytes[i]; 1609 int byte = bytes[i];
1731 if (byte < 128) { 1610 if (byte < 128 &&
1732 if ((canonicalTable[byte >> 4] & (1 << (byte & 0x0f))) != 0) { 1611 ((canonicalTable[byte >> 4] & (1 << (byte & 0x0f))) != 0)) {
1733 result.writeCharCode(byte); 1612 result.writeCharCode(byte);
1734 continue; 1613 } else if (spaceToPlus && byte == _SPACE) {
1735 } 1614 result.writeCharCode(_PLUS);
1736 if (spaceToPlus && byte == _SPACE) { 1615 } else {
1737 result.writeCharCode(_PLUS); 1616 result.writeCharCode(_PERCENT);
1738 continue; 1617 byteToHex(byte, result);
1739 }
1740 } 1618 }
1741 result.writeCharCode(_PERCENT);
1742 byteToHex(byte, result);
1743 } 1619 }
1744 return result.toString(); 1620 return result.toString();
1745 } 1621 }
1746 1622
1747 /** 1623 /**
1748 * Convert a byte (2 character hex sequence) in string [s] starting 1624 * Convert a byte (2 character hex sequence) in string [s] starting
1749 * at position [pos] to its ordinal value 1625 * at position [pos] to its ordinal value
1750 */ 1626 */
1751 static int _hexCharPairToByte(String s, int pos) { 1627 static int _hexCharPairToByte(String s, int pos) {
1752 int byte = 0; 1628 int byte = 0;
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after
1958 0x2bff, // 0x30 - 0x3f 1111111111010100 1834 0x2bff, // 0x30 - 0x3f 1111111111010100
1959 // ABCDEFGHIJKLMNO 1835 // ABCDEFGHIJKLMNO
1960 0xfffe, // 0x40 - 0x4f 0111111111111111 1836 0xfffe, // 0x40 - 0x4f 0111111111111111
1961 // PQRSTUVWXYZ _ 1837 // PQRSTUVWXYZ _
1962 0x87ff, // 0x50 - 0x5f 1111111111100001 1838 0x87ff, // 0x50 - 0x5f 1111111111100001
1963 // abcdefghijklmno 1839 // abcdefghijklmno
1964 0xfffe, // 0x60 - 0x6f 0111111111111111 1840 0xfffe, // 0x60 - 0x6f 0111111111111111
1965 // pqrstuvwxyz ~ 1841 // pqrstuvwxyz ~
1966 0x47ff]; // 0x70 - 0x7f 1111111111100010 1842 0x47ff]; // 0x70 - 0x7f 1111111111100010
1967 1843
1968 // Characters allowed in the userinfo as of RFC 3986.
1969 // RFC 3986 Apendix A
1970 // userinfo = *( unreserved / pct-encoded / sub-delims / ':')
1971 static const _userinfoTable = const [
1972 // LSB MSB
1973 // | |
1974 0x0000, // 0x00 - 0x0f 0000000000000000
1975 0x0000, // 0x10 - 0x1f 0000000000000000
1976 // ! $ &'()*+,-.
1977 0x7fd2, // 0x20 - 0x2f 0100101111111110
1978 // 0123456789:; =
1979 0x2fff, // 0x30 - 0x3f 1111111111110100
1980 // ABCDEFGHIJKLMNO
1981 0xfffe, // 0x40 - 0x4f 0111111111111111
1982 // PQRSTUVWXYZ _
1983 0x87ff, // 0x50 - 0x5f 1111111111100001
1984 // abcdefghijklmno
1985 0xfffe, // 0x60 - 0x6f 0111111111111111
1986 // pqrstuvwxyz ~
1987 0x47ff]; // 0x70 - 0x7f 1111111111100010
1988
1989 // Characters allowed in the path as of RFC 3986. 1844 // Characters allowed in the path as of RFC 3986.
1990 // RFC 3986 section 3.3. 1845 // RFC 3986 section 3.3.
1991 // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 1846 // pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
1992 static const _pathCharTable = const [ 1847 static const _pathCharTable = const [
1993 // LSB MSB 1848 // LSB MSB
1994 // | | 1849 // | |
1995 0x0000, // 0x00 - 0x0f 0000000000000000 1850 0x0000, // 0x00 - 0x0f 0000000000000000
1996 0x0000, // 0x10 - 0x1f 0000000000000000 1851 0x0000, // 0x10 - 0x1f 0000000000000000
1997 // ! $ &'()*+,-. 1852 // ! $ &'()*+,-.
1998 0x7fd2, // 0x20 - 0x2f 0100101111111110 1853 0x7fd2, // 0x20 - 0x2f 0100101111111110
1999 // 0123456789:; = 1854 // 0123456789:; =
2000 0x2fff, // 0x30 - 0x3f 1111111111110100 1855 0x2fff, // 0x30 - 0x3f 1111111111110100
2001 // @ABCDEFGHIJKLMNO 1856 // @ABCDEFGHIJKLMNO
2002 0xffff, // 0x40 - 0x4f 1111111111111111 1857 0xffff, // 0x40 - 0x4f 1111111111111111
2003 // PQRSTUVWXYZ _ 1858 // PQRSTUVWXYZ _
2004 0x87ff, // 0x50 - 0x5f 1111111111100001 1859 0x87ff, // 0x50 - 0x5f 1111111111100001
2005 // abcdefghijklmno 1860 // abcdefghijklmno
2006 0xfffe, // 0x60 - 0x6f 0111111111111111 1861 0xfffe, // 0x60 - 0x6f 0111111111111111
2007 // pqrstuvwxyz ~ 1862 // pqrstuvwxyz ~
2008 0x47ff]; // 0x70 - 0x7f 1111111111100010 1863 0x47ff]; // 0x70 - 0x7f 1111111111100010
2009 1864
2010 // Characters allowed in the path as of RFC 3986.
2011 // RFC 3986 section 3.3 *and* slash.
2012 static const _pathCharOrSlashTable = const [
2013 // LSB MSB
2014 // | |
2015 0x0000, // 0x00 - 0x0f 0000000000000000
2016 0x0000, // 0x10 - 0x1f 0000000000000000
2017 // ! $ &'()*+,-./
2018 0xffd2, // 0x20 - 0x2f 0100101111111111
2019 // 0123456789:; =
2020 0x2fff, // 0x30 - 0x3f 1111111111110100
2021 // @ABCDEFGHIJKLMNO
2022 0xffff, // 0x40 - 0x4f 1111111111111111
2023 // PQRSTUVWXYZ _
2024 0x87ff, // 0x50 - 0x5f 1111111111100001
2025 // abcdefghijklmno
2026 0xfffe, // 0x60 - 0x6f 0111111111111111
2027 // pqrstuvwxyz ~
2028 0x47ff]; // 0x70 - 0x7f 1111111111100010
2029
2030 // Characters allowed in the query as of RFC 3986. 1865 // Characters allowed in the query as of RFC 3986.
2031 // RFC 3986 section 3.4. 1866 // RFC 3986 section 3.4.
2032 // query = *( pchar / "/" / "?" ) 1867 // query = *( pchar / "/" / "?" )
2033 static const _queryCharTable = const [ 1868 static const _queryCharTable = const [
2034 // LSB MSB 1869 // LSB MSB
2035 // | | 1870 // | |
2036 0x0000, // 0x00 - 0x0f 0000000000000000 1871 0x0000, // 0x00 - 0x0f 0000000000000000
2037 0x0000, // 0x10 - 0x1f 0000000000000000 1872 0x0000, // 0x10 - 0x1f 0000000000000000
2038 // ! $ &'()*+,-./ 1873 // ! $ &'()*+,-./
2039 0xffd2, // 0x20 - 0x2f 0100101111111111 1874 0xffd2, // 0x20 - 0x2f 0100101111111111
2040 // 0123456789:; = ? 1875 // 0123456789:; = ?
2041 0xafff, // 0x30 - 0x3f 1111111111110101 1876 0xafff, // 0x30 - 0x3f 1111111111110101
2042 // @ABCDEFGHIJKLMNO 1877 // @ABCDEFGHIJKLMNO
2043 0xffff, // 0x40 - 0x4f 1111111111111111 1878 0xffff, // 0x40 - 0x4f 1111111111111111
2044 // PQRSTUVWXYZ _ 1879 // PQRSTUVWXYZ _
2045 0x87ff, // 0x50 - 0x5f 1111111111100001 1880 0x87ff, // 0x50 - 0x5f 1111111111100001
2046 // abcdefghijklmno 1881 // abcdefghijklmno
2047 0xfffe, // 0x60 - 0x6f 0111111111111111 1882 0xfffe, // 0x60 - 0x6f 0111111111111111
2048 // pqrstuvwxyz ~ 1883 // pqrstuvwxyz ~
2049 0x47ff]; // 0x70 - 0x7f 1111111111100010 1884 0x47ff]; // 0x70 - 0x7f 1111111111100010
2050 } 1885 }
OLDNEW
« no previous file with comments | « runtime/bin/builtin.dart ('k') | sdk/lib/io/http_impl.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698