Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(64)

Side by Side Diff: sdk/lib/core/uri.dart

Issue 1071573002: Add start/end to Uri.parse, allowing you to parse a substring without creating a new String object. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Add test, make it work. Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tests/corelib/uri_parse_test.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 part of dart.core; 5 part of dart.core;
6 6
7 /** 7 /**
8 * A parsed URI, such as a URL. 8 * A parsed URI, such as a URL.
9 * 9 *
10 * **See also:** 10 * **See also:**
(...skipping 128 matching lines...) Expand 10 before | Expand all | Expand 10 after
139 List<String> _pathSegments; 139 List<String> _pathSegments;
140 140
141 /** 141 /**
142 * Cache the computed return value of [queryParameters]. 142 * Cache the computed return value of [queryParameters].
143 */ 143 */
144 Map<String, String> _queryParameters; 144 Map<String, String> _queryParameters;
145 145
146 /** 146 /**
147 * Creates a new `Uri` object by parsing a URI string. 147 * Creates a new `Uri` object by parsing a URI string.
148 * 148 *
149 * If [start] and [end] are provided, only the substring from `start`
150 * to `end` is parsed as a URI.
151 *
149 * If the string is not valid as a URI or URI reference, 152 * If the string is not valid as a URI or URI reference,
150 * invalid characters will be percent escaped where possible. 153 * invalid characters will be percent escaped where possible.
151 * The resulting `Uri` will represent a valid URI or URI reference. 154 * The resulting `Uri` will represent a valid URI or URI reference.
152 */ 155 */
153 static Uri parse(String uri) { 156 static Uri parse(String uri, [int start = 0, int end]) {
154 // This parsing will not validate percent-encoding, IPv6, etc. When done 157 // This parsing will not validate percent-encoding, IPv6, etc. When done
155 // it will call `new Uri(...)` which will perform these validations. 158 // it will call `new Uri(...)` which will perform these validations.
156 // This is purely splitting up the URI string into components. 159 // This is purely splitting up the URI string into components.
157 // 160 //
158 // Important parts of the RFC 3986 used here: 161 // Important parts of the RFC 3986 used here:
159 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] 162 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
160 // 163 //
161 // hier-part = "//" authority path-abempty 164 // hier-part = "//" authority path-abempty
162 // / path-absolute 165 // / path-absolute
163 // / path-rootless 166 // / path-rootless
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
209 } 212 }
210 const int EOI = -1; 213 const int EOI = -1;
211 214
212 String scheme = ""; 215 String scheme = "";
213 String userinfo = ""; 216 String userinfo = "";
214 String host = null; 217 String host = null;
215 int port = null; 218 int port = null;
216 String path = null; 219 String path = null;
217 String query = null; 220 String query = null;
218 String fragment = null; 221 String fragment = null;
222 if (end == null) end = uri.length;
219 223
220 int index = 0; 224 int index = start;
221 int pathStart = 0; 225 int pathStart = start;
222 // End of input-marker. 226 // End of input-marker.
223 int char = EOI; 227 int char = EOI;
224 228
225 void parseAuth() { 229 void parseAuth() {
226 if (index == uri.length) { 230 if (index == end) {
227 char = EOI; 231 char = EOI;
228 return; 232 return;
229 } 233 }
230 int authStart = index; 234 int authStart = index;
231 int lastColon = -1; 235 int lastColon = -1;
232 int lastAt = -1; 236 int lastAt = -1;
233 char = uri.codeUnitAt(index); 237 char = uri.codeUnitAt(index);
234 while (index < uri.length) { 238 while (index < end) {
235 char = uri.codeUnitAt(index); 239 char = uri.codeUnitAt(index);
236 if (char == _SLASH || char == _QUESTION || char == _NUMBER_SIGN) { 240 if (char == _SLASH || char == _QUESTION || char == _NUMBER_SIGN) {
237 break; 241 break;
238 } 242 }
239 if (char == _AT_SIGN) { 243 if (char == _AT_SIGN) {
240 lastAt = index; 244 lastAt = index;
241 lastColon = -1; 245 lastColon = -1;
242 } else if (char == _COLON) { 246 } else if (char == _COLON) {
243 lastColon = index; 247 lastColon = index;
244 } else if (char == _LEFT_BRACKET) { 248 } else if (char == _LEFT_BRACKET) {
245 lastColon = -1; 249 lastColon = -1;
246 int endBracket = uri.indexOf(']', index + 1); 250 int endBracket = uri.indexOf(']', index + 1);
247 if (endBracket == -1) { 251 if (endBracket == -1) {
248 index = uri.length; 252 index = end;
249 char = EOI; 253 char = EOI;
250 break; 254 break;
251 } else { 255 } else {
252 index = endBracket; 256 index = endBracket;
253 } 257 }
254 } 258 }
255 index++; 259 index++;
256 char = EOI; 260 char = EOI;
257 } 261 }
258 int hostStart = authStart; 262 int hostStart = authStart;
(...skipping 11 matching lines...) Expand all
270 if (_ZERO > digit || _NINE < digit) { 274 if (_ZERO > digit || _NINE < digit) {
271 _fail(uri, i, "Invalid port number"); 275 _fail(uri, i, "Invalid port number");
272 } 276 }
273 portNumber = portNumber * 10 + (digit - _ZERO); 277 portNumber = portNumber * 10 + (digit - _ZERO);
274 } 278 }
275 } 279 }
276 port = _makePort(portNumber, scheme); 280 port = _makePort(portNumber, scheme);
277 hostEnd = lastColon; 281 hostEnd = lastColon;
278 } 282 }
279 host = _makeHost(uri, hostStart, hostEnd, true); 283 host = _makeHost(uri, hostStart, hostEnd, true);
280 if (index < uri.length) { 284 if (index < end) {
281 char = uri.codeUnitAt(index); 285 char = uri.codeUnitAt(index);
282 } 286 }
283 } 287 }
284 288
285 // When reaching path parsing, the current character is known to not 289 // When reaching path parsing, the current character is known to not
286 // be part of the path. 290 // be part of the path.
287 const int NOT_IN_PATH = 0; 291 const int NOT_IN_PATH = 0;
288 // When reaching path parsing, the current character is part 292 // When reaching path parsing, the current character is part
289 // of the a non-empty path. 293 // of the a non-empty path.
290 const int IN_PATH = 1; 294 const int IN_PATH = 1;
291 // When reaching authority parsing, authority is possible. 295 // When reaching authority parsing, authority is possible.
292 // This is only true at start or right after scheme. 296 // This is only true at start or right after scheme.
293 const int ALLOW_AUTH = 2; 297 const int ALLOW_AUTH = 2;
294 298
295 // Current state. 299 // Current state.
296 // Initialized to the default value that is used when exiting the 300 // Initialized to the default value that is used when exiting the
297 // scheme loop by reaching the end of input. 301 // scheme loop by reaching the end of input.
298 // All other breaks set their own state. 302 // All other breaks set their own state.
299 int state = NOT_IN_PATH; 303 int state = NOT_IN_PATH;
300 int i = index; // Temporary alias for index to avoid bug 19550 in dart2js. 304 int i = index; // Temporary alias for index to avoid bug 19550 in dart2js.
301 while (i < uri.length) { 305 while (i < end) {
302 char = uri.codeUnitAt(i); 306 char = uri.codeUnitAt(i);
303 if (char == _QUESTION || char == _NUMBER_SIGN) { 307 if (char == _QUESTION || char == _NUMBER_SIGN) {
304 state = NOT_IN_PATH; 308 state = NOT_IN_PATH;
305 break; 309 break;
306 } 310 }
307 if (char == _SLASH) { 311 if (char == _SLASH) {
308 state = (i == 0) ? ALLOW_AUTH : IN_PATH; 312 state = (i == start) ? ALLOW_AUTH : IN_PATH;
309 break; 313 break;
310 } 314 }
311 if (char == _COLON) { 315 if (char == _COLON) {
312 if (i == 0) _fail(uri, 0, "Invalid empty scheme"); 316 if (i == start) _fail(uri, start, "Invalid empty scheme");
313 scheme = _makeScheme(uri, i); 317 scheme = _makeScheme(uri, start, i);
314 i++; 318 i++;
315 pathStart = i; 319 pathStart = i;
316 if (i == uri.length) { 320 if (i == end) {
317 char = EOI; 321 char = EOI;
318 state = NOT_IN_PATH; 322 state = NOT_IN_PATH;
319 } else { 323 } else {
320 char = uri.codeUnitAt(i); 324 char = uri.codeUnitAt(i);
321 if (char == _QUESTION || char == _NUMBER_SIGN) { 325 if (char == _QUESTION || char == _NUMBER_SIGN) {
322 state = NOT_IN_PATH; 326 state = NOT_IN_PATH;
323 } else if (char == _SLASH) { 327 } else if (char == _SLASH) {
324 state = ALLOW_AUTH; 328 state = ALLOW_AUTH;
325 } else { 329 } else {
326 state = IN_PATH; 330 state = IN_PATH;
327 } 331 }
328 } 332 }
329 break; 333 break;
330 } 334 }
331 i++; 335 i++;
332 char = EOI; 336 char = EOI;
333 } 337 }
334 index = i; // Remove alias when bug is fixed. 338 index = i; // Remove alias when bug is fixed.
335 339
336 if (state == ALLOW_AUTH) { 340 if (state == ALLOW_AUTH) {
337 assert(char == _SLASH); 341 assert(char == _SLASH);
338 // Have seen one slash either at start or right after scheme. 342 // Have seen one slash either at start or right after scheme.
339 // If two slashes, it's an authority, otherwise it's just the path. 343 // If two slashes, it's an authority, otherwise it's just the path.
340 index++; 344 index++;
341 if (index == uri.length) { 345 if (index == end) {
342 char = EOI; 346 char = EOI;
343 state = NOT_IN_PATH; 347 state = NOT_IN_PATH;
344 } else { 348 } else {
345 char = uri.codeUnitAt(index); 349 char = uri.codeUnitAt(index);
346 if (char == _SLASH) { 350 if (char == _SLASH) {
347 index++; 351 index++;
348 parseAuth(); 352 parseAuth();
349 pathStart = index; 353 pathStart = index;
350 } 354 }
351 if (char == _QUESTION || char == _NUMBER_SIGN || char == EOI) { 355 if (char == _QUESTION || char == _NUMBER_SIGN || char == EOI) {
352 state = NOT_IN_PATH; 356 state = NOT_IN_PATH;
353 } else { 357 } else {
354 state = IN_PATH; 358 state = IN_PATH;
355 } 359 }
356 } 360 }
357 } 361 }
358 362
359 assert(state == IN_PATH || state == NOT_IN_PATH); 363 assert(state == IN_PATH || state == NOT_IN_PATH);
360 if (state == IN_PATH) { 364 if (state == IN_PATH) {
361 // Characters from pathStart to index (inclusive) are known 365 // Characters from pathStart to index (inclusive) are known
362 // to be part of the path. 366 // to be part of the path.
363 while (++index < uri.length) { 367 while (++index < end) {
364 char = uri.codeUnitAt(index); 368 char = uri.codeUnitAt(index);
365 if (char == _QUESTION || char == _NUMBER_SIGN) { 369 if (char == _QUESTION || char == _NUMBER_SIGN) {
366 break; 370 break;
367 } 371 }
368 char = EOI; 372 char = EOI;
369 } 373 }
370 state = NOT_IN_PATH; 374 state = NOT_IN_PATH;
371 } 375 }
372 376
373 assert(state == NOT_IN_PATH); 377 assert(state == NOT_IN_PATH);
374 bool isFile = (scheme == "file"); 378 bool isFile = (scheme == "file");
375 bool ensureLeadingSlash = host != null; 379 bool ensureLeadingSlash = host != null;
376 path = _makePath(uri, pathStart, index, null, ensureLeadingSlash, isFile); 380 path = _makePath(uri, pathStart, index, null, ensureLeadingSlash, isFile);
377 381
378 if (char == _QUESTION) { 382 if (char == _QUESTION) {
379 int numberSignIndex = uri.indexOf('#', index + 1); 383 int numberSignIndex = -1;
Søren Gjesse 2015/04/09 07:04:42 We could add an optional end to indexOf as well...
Lasse Reichstein Nielsen 2015/04/09 07:52:03 I actually thought about that, but it doesn't work
384 for (int i = index + 1; i < end; i++) {
385 if (uri.codeUnitAt(i) == _NUMBER_SIGN) {
386 numberSignIndex = i;
387 break;
388 }
389 }
380 if (numberSignIndex < 0) { 390 if (numberSignIndex < 0) {
381 query = _makeQuery(uri, index + 1, uri.length, null); 391 query = _makeQuery(uri, index + 1, end, null);
382 } else { 392 } else {
383 query = _makeQuery(uri, index + 1, numberSignIndex, null); 393 query = _makeQuery(uri, index + 1, numberSignIndex, null);
384 fragment = _makeFragment(uri, numberSignIndex + 1, uri.length); 394 fragment = _makeFragment(uri, numberSignIndex + 1, end);
385 } 395 }
386 } else if (char == _NUMBER_SIGN) { 396 } else if (char == _NUMBER_SIGN) {
387 fragment = _makeFragment(uri, index + 1, uri.length); 397 fragment = _makeFragment(uri, index + 1, end);
388 } 398 }
389 return new Uri._internal(scheme, 399 return new Uri._internal(scheme,
390 userinfo, 400 userinfo,
391 host, 401 host,
392 port, 402 port,
393 path, 403 path,
394 query, 404 query,
395 fragment); 405 fragment);
396 } 406 }
397 407
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
475 */ 485 */
476 factory Uri({String scheme : "", 486 factory Uri({String scheme : "",
477 String userInfo : "", 487 String userInfo : "",
478 String host, 488 String host,
479 int port, 489 int port,
480 String path, 490 String path,
481 Iterable<String> pathSegments, 491 Iterable<String> pathSegments,
482 String query, 492 String query,
483 Map<String, String> queryParameters, 493 Map<String, String> queryParameters,
484 String fragment}) { 494 String fragment}) {
485 scheme = _makeScheme(scheme, _stringOrNullLength(scheme)); 495 scheme = _makeScheme(scheme, 0, _stringOrNullLength(scheme));
486 userInfo = _makeUserInfo(userInfo, 0, _stringOrNullLength(userInfo)); 496 userInfo = _makeUserInfo(userInfo, 0, _stringOrNullLength(userInfo));
487 host = _makeHost(host, 0, _stringOrNullLength(host), false); 497 host = _makeHost(host, 0, _stringOrNullLength(host), false);
488 // Special case this constructor for backwards compatibility. 498 // Special case this constructor for backwards compatibility.
489 if (query == "") query = null; 499 if (query == "") query = null;
490 query = _makeQuery(query, 0, _stringOrNullLength(query), queryParameters); 500 query = _makeQuery(query, 0, _stringOrNullLength(query), queryParameters);
491 fragment = _makeFragment(fragment, 0, _stringOrNullLength(fragment)); 501 fragment = _makeFragment(fragment, 0, _stringOrNullLength(fragment));
492 port = _makePort(port, scheme); 502 port = _makePort(port, scheme);
493 bool isFile = (scheme == "file"); 503 bool isFile = (scheme == "file");
494 if (host == null && 504 if (host == null &&
495 (userInfo.isNotEmpty || port != null || isFile)) { 505 (userInfo.isNotEmpty || port != null || isFile)) {
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
561 var host = null; 571 var host = null;
562 var port = null; 572 var port = null;
563 573
564 if (authority != null && authority.isNotEmpty) { 574 if (authority != null && authority.isNotEmpty) {
565 var hostStart = 0; 575 var hostStart = 0;
566 // Split off the user info. 576 // Split off the user info.
567 bool hasUserInfo = false; 577 bool hasUserInfo = false;
568 for (int i = 0; i < authority.length; i++) { 578 for (int i = 0; i < authority.length; i++) {
569 if (authority.codeUnitAt(i) == _AT_SIGN) { 579 if (authority.codeUnitAt(i) == _AT_SIGN) {
570 hasUserInfo = true; 580 hasUserInfo = true;
571 userInfo = authority.substring(0, i); 581 userInfo = authority.substring(start, i);
572 hostStart = i + 1; 582 hostStart = i + 1;
573 break; 583 break;
574 } 584 }
575 } 585 }
576 var hostEnd = hostStart; 586 var hostEnd = hostStart;
577 if (hostStart < authority.length && 587 if (hostStart < authority.length &&
578 authority.codeUnitAt(hostStart) == _LEFT_BRACKET) { 588 authority.codeUnitAt(hostStart) == _LEFT_BRACKET) {
579 // IPv6 host. 589 // IPv6 host.
580 for (; hostEnd < authority.length; hostEnd++) { 590 for (; hostEnd < authority.length; hostEnd++) {
581 if (authority.codeUnitAt(hostEnd) == _RIGHT_BRACKET) break; 591 if (authority.codeUnitAt(hostEnd) == _RIGHT_BRACKET) break;
(...skipping 283 matching lines...) Expand 10 before | Expand all | Expand 10 after
865 String path, 875 String path,
866 Iterable<String> pathSegments, 876 Iterable<String> pathSegments,
867 String query, 877 String query,
868 Map<String, String> queryParameters, 878 Map<String, String> queryParameters,
869 String fragment}) { 879 String fragment}) {
870 // Set to true if the scheme has (potentially) changed. 880 // Set to true if the scheme has (potentially) changed.
871 // In that case, the default port may also have changed and we need 881 // In that case, the default port may also have changed and we need
872 // to check even the existing port. 882 // to check even the existing port.
873 bool schemeChanged = false; 883 bool schemeChanged = false;
874 if (scheme != null) { 884 if (scheme != null) {
875 scheme = _makeScheme(scheme, scheme.length); 885 scheme = _makeScheme(scheme, 0, scheme.length);
876 schemeChanged = true; 886 schemeChanged = true;
877 } else { 887 } else {
878 scheme = this.scheme; 888 scheme = this.scheme;
879 } 889 }
880 bool isFile = (scheme == "file"); 890 bool isFile = (scheme == "file");
881 if (userInfo != null) { 891 if (userInfo != null) {
882 userInfo = _makeUserInfo(userInfo, 0, userInfo.length); 892 userInfo = _makeUserInfo(userInfo, 0, userInfo.length);
883 } else { 893 } else {
884 userInfo = this.userInfo; 894 userInfo = this.userInfo;
885 } 895 }
(...skipping 207 matching lines...) Expand 10 before | Expand all | Expand 10 after
1093 buffer.write(slice); 1103 buffer.write(slice);
1094 } 1104 }
1095 return buffer.toString(); 1105 return buffer.toString();
1096 } 1106 }
1097 1107
1098 /** 1108 /**
1099 * Validates scheme characters and does case-normalization. 1109 * Validates scheme characters and does case-normalization.
1100 * 1110 *
1101 * Schemes are converted to lower case. They cannot contain escapes. 1111 * Schemes are converted to lower case. They cannot contain escapes.
1102 */ 1112 */
1103 static String _makeScheme(String scheme, int end) { 1113 static String _makeScheme(String scheme, int start, int end) {
1104 if (end == 0) return ""; 1114 if (start == end) return "";
1105 final int firstCodeUnit = scheme.codeUnitAt(0); 1115 final int firstCodeUnit = scheme.codeUnitAt(start);
1106 if (!_isAlphabeticCharacter(firstCodeUnit)) { 1116 if (!_isAlphabeticCharacter(firstCodeUnit)) {
1107 _fail(scheme, 0, "Scheme not starting with alphabetic character"); 1117 _fail(scheme, start, "Scheme not starting with alphabetic character");
1108 } 1118 }
1109 bool allLowercase = firstCodeUnit >= _LOWER_CASE_A; 1119 bool allLowercase = firstCodeUnit >= _LOWER_CASE_A;
1110 for (int i = 0; i < end; i++) { 1120 for (int i = start; i < end; i++) {
1111 final int codeUnit = scheme.codeUnitAt(i); 1121 final int codeUnit = scheme.codeUnitAt(i);
1112 if (!_isSchemeCharacter(codeUnit)) { 1122 if (!_isSchemeCharacter(codeUnit)) {
1113 _fail(scheme, i, "Illegal scheme character"); 1123 _fail(scheme, i, "Illegal scheme character");
1114 } 1124 }
1115 if (codeUnit < _LOWER_CASE_A || codeUnit > _LOWER_CASE_Z) { 1125 if (codeUnit < _LOWER_CASE_A || codeUnit > _LOWER_CASE_Z) {
1116 allLowercase = false; 1126 allLowercase = false;
1117 } 1127 }
1118 } 1128 }
1119 scheme = scheme.substring(0, end); 1129 scheme = scheme.substring(start, end);
1120 if (!allLowercase) scheme = scheme.toLowerCase(); 1130 if (!allLowercase) scheme = scheme.toLowerCase();
1121 return scheme; 1131 return scheme;
1122 } 1132 }
1123 1133
1124 static String _makeUserInfo(String userInfo, int start, int end) { 1134 static String _makeUserInfo(String userInfo, int start, int end) {
1125 if (userInfo == null) return ""; 1135 if (userInfo == null) return "";
1126 return _normalize(userInfo, start, end, _userinfoTable); 1136 return _normalize(userInfo, start, end, _userinfoTable);
1127 } 1137 }
1128 1138
1129 static String _makePath(String path, int start, int end, 1139 static String _makePath(String path, int start, int end,
(...skipping 1268 matching lines...) Expand 10 before | Expand all | Expand 10 after
2398 0xafff, // 0x30 - 0x3f 1111111111110101 2408 0xafff, // 0x30 - 0x3f 1111111111110101
2399 // @ABCDEFGHIJKLMNO 2409 // @ABCDEFGHIJKLMNO
2400 0xffff, // 0x40 - 0x4f 1111111111111111 2410 0xffff, // 0x40 - 0x4f 1111111111111111
2401 // PQRSTUVWXYZ _ 2411 // PQRSTUVWXYZ _
2402 0x87ff, // 0x50 - 0x5f 1111111111100001 2412 0x87ff, // 0x50 - 0x5f 1111111111100001
2403 // abcdefghijklmno 2413 // abcdefghijklmno
2404 0xfffe, // 0x60 - 0x6f 0111111111111111 2414 0xfffe, // 0x60 - 0x6f 0111111111111111
2405 // pqrstuvwxyz ~ 2415 // pqrstuvwxyz ~
2406 0x47ff]; // 0x70 - 0x7f 1111111111100010 2416 0x47ff]; // 0x70 - 0x7f 1111111111100010
2407 } 2417 }
OLDNEW
« no previous file with comments | « no previous file | tests/corelib/uri_parse_test.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698