Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(778)

Side by Side Diff: sdk/lib/core/uri.dart

Issue 321543003: New, more validating, parser for URI. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Add validation and normalization to URI components. Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « pkg/json_rpc_2/test/server/parameters_test.dart ('k') | tests/corelib/uri_test.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 part of dart.core; 5 part of dart.core;
6 6
7 /** 7 /**
8 * A parsed URI, such as a URL. 8 * A parsed URI, such as a URL.
9 * 9 *
10 * **See also:** 10 * **See also:**
(...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after
165 // segment = *pchar 165 // segment = *pchar
166 // segment-nz = 1*pchar 166 // segment-nz = 1*pchar
167 // segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) 167 // segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
168 // ; non-zero-length segment without any colon ":" 168 // ; non-zero-length segment without any colon ":"
169 // 169 //
170 // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 170 // pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
171 // 171 //
172 // query = *( pchar / "/" / "?" ) 172 // query = *( pchar / "/" / "?" )
173 // 173 //
174 // fragment = *( pchar / "/" / "?" ) 174 // fragment = *( pchar / "/" / "?" )
175 bool isRegName(int ch) {
176 return ch < 128 && ((_regNameTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
177 }
178 175
179 int ipV6Address(int index) { 176 int ipV6Address(int index) {
180 // IPv6. Skip to ']'. 177 // IPv6. Skip to ']'.
181 index = uri.indexOf(']', index); 178 index = uri.indexOf(']', index);
182 if (index == -1) { 179 if (index == -1) {
183 throw new FormatException("Bad end of IPv6 host"); 180 throw new FormatException("Bad end of IPv6 host");
184 } 181 }
185 return index + 1; 182 return index + 1;
186 } 183 }
187 184
188 int length = uri.length; 185 int length = uri.length;
189 int index = 0; 186 int index = 0;
190 187
191 int schemeEndIndex = 0; 188 int schemeEndIndex = 0;
192 189
193 if (length == 0) { 190 if (length == 0) {
194 return new Uri(); 191 return new Uri();
195 } 192 }
193 // Whether to allow a colon in the first path segment.
194 bool allowColon = false;
196 195
197 if (uri.codeUnitAt(0) != _SLASH) { 196 if (_isAlphabeticCharacter(uri.codeUnitAt(0))) {
198 // Can be scheme. 197 // Can be scheme.
199 while (index < length) { 198 while (index < length) {
200 // Look for ':'. If found, continue from the post of ':'. If not (end 199 // Look for ':' to end the scheme.
201 // reached or invalid scheme char found) back up one char, and continue 200 // If found continue from after ':'.
202 // to path. 201 // If not (end reached or invalid scheme char found) back up one char,
202 // and continue as a path.
203 // Note that scheme-chars is contained in path-chars. 203 // Note that scheme-chars is contained in path-chars.
204 int codeUnit = uri.codeUnitAt(index++); 204 int codeUnit = uri.codeUnitAt(index++);
205 if (!_isSchemeCharacter(codeUnit)) { 205 if (!_isSchemeCharacter(codeUnit)) {
206 if (codeUnit == _COLON) { 206 if (codeUnit == _COLON) {
207 schemeEndIndex = index; 207 schemeEndIndex = index;
208 allowColon = true; // Scheme detected, allow colon in path.
208 } else { 209 } else {
209 // Back up one char, since we met an invalid scheme char. 210 // Back up one char, since we met an invalid scheme char.
210 index--; 211 index--;
211 } 212 }
212 break; 213 break;
213 } 214 }
214 } 215 }
215 } 216 }
216 217
217 int userInfoEndIndex = -1; 218 int userInfoEndIndex = -1;
218 int portIndex = -1; 219 int portIndex = -1;
219 int authorityEndIndex = schemeEndIndex; 220 int authorityEndIndex = schemeEndIndex;
220 // If we see '//', there must be an authority. 221 // If we see '//', there must be an authority.
221 if (authorityEndIndex == index && 222 if (authorityEndIndex == index &&
222 authorityEndIndex + 1 < length && 223 authorityEndIndex + 1 < length &&
223 uri.codeUnitAt(authorityEndIndex) == _SLASH && 224 uri.codeUnitAt(authorityEndIndex) == _SLASH &&
224 uri.codeUnitAt(authorityEndIndex + 1) == _SLASH) { 225 uri.codeUnitAt(authorityEndIndex + 1) == _SLASH) {
225 // Skip '//'. 226 // Skip '//'.
227 allowColon = true; // First slash seen, allow colon in path.
226 authorityEndIndex += 2; 228 authorityEndIndex += 2;
227 // It can both be host and userInfo. 229 // It can both be host and userInfo.
228 while (authorityEndIndex < length) { 230 while (authorityEndIndex < length) {
229 int codeUnit = uri.codeUnitAt(authorityEndIndex++); 231 int codeUnit = uri.codeUnitAt(authorityEndIndex++);
230 if (!isRegName(codeUnit)) { 232 if (!_isRegNameChar(codeUnit)) {
231 if (codeUnit == _LEFT_BRACKET) { 233 if (codeUnit == _LEFT_BRACKET) {
232 authorityEndIndex = ipV6Address(authorityEndIndex); 234 authorityEndIndex = ipV6Address(authorityEndIndex);
233 } else if (portIndex == -1 && codeUnit == _COLON) { 235 } else if (portIndex == -1 && codeUnit == _COLON) {
234 // First time ':'. 236 // First time ':'.
235 portIndex = authorityEndIndex; 237 portIndex = authorityEndIndex;
236 } else if (codeUnit == _AT_SIGN || codeUnit == _COLON) { 238 } else if (codeUnit == _AT_SIGN || codeUnit == _COLON) {
237 // Second time ':' or first '@'. Must be userInfo. 239 // Second time ':' or first '@'. Must be userInfo.
238 userInfoEndIndex = uri.indexOf('@', authorityEndIndex - 1); 240 if (codeUnit == _AT_SIGN) {
239 // Not found. Must be path then. 241 userInfoEndIndex = authorityEndIndex - 1;
240 if (userInfoEndIndex == -1) { 242 } else {
241 authorityEndIndex = index; 243 userInfoEndIndex = uri.indexOf('@', authorityEndIndex);
242 break; 244 // @ Not found after something that can only be userinfo.
245 if (userInfoEndIndex < 0) {
246 _fail(uri, uri.length, "No '@' after userinfo");
247 }
243 } 248 }
244 portIndex = -1; 249 portIndex = -1;
245 authorityEndIndex = userInfoEndIndex + 1; 250 authorityEndIndex = userInfoEndIndex + 1;
246 // Now it can only be host:port. 251 // Now it can only be host:port.
247 while (authorityEndIndex < length) { 252 while (authorityEndIndex < length) {
248 int codeUnit = uri.codeUnitAt(authorityEndIndex++); 253 int codeUnit = uri.codeUnitAt(authorityEndIndex++);
249 if (!isRegName(codeUnit)) { 254 if (!_isRegNameChar(codeUnit)) {
250 if (codeUnit == _LEFT_BRACKET) { 255 if (codeUnit == _LEFT_BRACKET) {
251 authorityEndIndex = ipV6Address(authorityEndIndex); 256 authorityEndIndex = ipV6Address(authorityEndIndex);
252 } else if (codeUnit == _COLON) { 257 } else if (codeUnit == _COLON) {
253 if (portIndex != -1) { 258 if (portIndex != -1) {
254 throw new FormatException("Double port in host"); 259 throw new FormatException("Double port in host");
255 } 260 }
256 portIndex = authorityEndIndex; 261 portIndex = authorityEndIndex;
257 } else { 262 } else {
258 authorityEndIndex--; 263 authorityEndIndex--;
259 break; 264 break;
260 } 265 }
261 } 266 }
262 } 267 }
263 break; 268 break;
264 } else { 269 } else {
265 authorityEndIndex--; 270 authorityEndIndex--;
266 break; 271 break;
267 } 272 }
268 } 273 }
269 } 274 }
275 if (authorityEndIndex < length) {
276 // path-abempty - either absolute or empty, so we need a slash if
277 // there is a path.
278 int codeUnit = uri.codeUnitAt(authorityEndIndex);
279 if (codeUnit != _SLASH &&
280 codeUnit != _QUESTION &&
281 codeUnit != _NUMBER_SIGN) {
282 _fail(uri, authorityEndIndex, "Invalid character in authority");
283 }
284 }
270 } else { 285 } else {
271 authorityEndIndex = schemeEndIndex; 286 authorityEndIndex = schemeEndIndex;
272 } 287 }
273 288
274 // At path now. 289 // At path now.
275 int pathEndIndex = authorityEndIndex; 290 int pathEndIndex = authorityEndIndex;
291 if (!allowColon) {
292 while (pathEndIndex < length) {
293 int codeUnit = uri.codeUnitAt(pathEndIndex++);
294 if (codeUnit == _QUESTION || codeUnit == _NUMBER_SIGN) {
295 pathEndIndex--;
296 break;
297 }
298 if (codeUnit == _SLASH) break;
299 if (codeUnit == _COLON) {
300 _fail(uri, pathEndIndex - 1, "Colon in initial path segment");
Søren Gjesse 2014/06/13 10:13:16 This is when at least one of the characters before
Lasse Reichstein Nielsen 2014/06/13 11:53:04 Yes. If there is no scheme and the path isn't abso
301 }
302 }
303 }
276 while (pathEndIndex < length) { 304 while (pathEndIndex < length) {
277 int codeUnit = uri.codeUnitAt(pathEndIndex++); 305 int codeUnit = uri.codeUnitAt(pathEndIndex++);
278 if (codeUnit == _QUESTION || codeUnit == _NUMBER_SIGN) { 306 if (codeUnit == _QUESTION || codeUnit == _NUMBER_SIGN) {
279 pathEndIndex--; 307 pathEndIndex--;
280 break; 308 break;
281 } 309 }
282 } 310 }
283 311
284 // Maybe query. 312 // Maybe query.
285 int queryEndIndex = pathEndIndex; 313 int queryEndIndex = pathEndIndex;
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
333 361
334 return new Uri(scheme: scheme, 362 return new Uri(scheme: scheme,
335 userInfo: userInfo, 363 userInfo: userInfo,
336 host: host, 364 host: host,
337 port: port, 365 port: port,
338 path: path, 366 path: path,
339 query: query, 367 query: query,
340 fragment: fragment); 368 fragment: fragment);
341 } 369 }
342 370
371 // Report a parse failure.
372 static void _fail(String uri, int index, String message) {
Søren Gjesse 2014/06/13 10:13:16 Thank you for the nice error messages!
Lasse Reichstein Nielsen 2014/06/13 11:53:04 I'm considering if we can put something like this
373 // TODO(lrn): Consider adding this to FormatException.
374 if (index == uri.length) {
375 message += " at end of input.";
376 } else {
377 message += " at position $index.\n";
378 // Pick a slice of uri containing index and, if
379 // necessary, truncate the ends to ensure the entire
380 // slice fits on one line.
381 int min = 0;
382 int max = uri.length;
383 String pre = "";
384 String post = "";
385 if (uri.length > 78) {
386 min = index - 10;
387 if (min < 0) min = 0;
388 int max = min + 72;
389 if (max > uri.length) {
390 max = uri.length;
391 min = max - 72;
392 }
393 if (min != 0) pre = "...";
394 if (max != uri.length) post = "...";
395 }
396 // Combine message, slice and a caret pointing to the error index.
397 message = "$message$pre${uri.substring(min, max)}$post\n"
398 "${' ' * (pre.length + index - min)}^";
399 }
400 throw new FormatException(message);
401 }
402
343 /** 403 /**
344 * Creates a new URI from its components. 404 * Creates a new URI from its components.
345 * 405 *
346 * Each component is set through a named argument. Any number of 406 * Each component is set through a named argument. Any number of
347 * components can be provided. The default value for the components 407 * components can be provided. The default value for the components
348 * not provided is the empry string, except for [port] which has a 408 * not provided is the empry string, except for [port] which has a
349 * default value of 0. The [path] and [query] components can be set 409 * default value of 0. The [path] and [query] components can be set
350 * using two different named arguments. 410 * using two different named arguments.
351 * 411 *
352 * The scheme component is set through [scheme]. The scheme is 412 * The scheme component is set through [scheme]. The scheme is
(...skipping 27 matching lines...) Expand all
380 * expected to be fully percent-encoded and is used in its literal 440 * expected to be fully percent-encoded and is used in its literal
381 * form. When [queryParameters] is used the query is built from the 441 * form. When [queryParameters] is used the query is built from the
382 * provided map. Each key and value in the map is percent-encoded 442 * provided map. Each key and value in the map is percent-encoded
383 * and joined using equal and ampersand characters. The 443 * and joined using equal and ampersand characters. The
384 * percent-encoding of the keys and values encodes all characters 444 * percent-encoding of the keys and values encodes all characters
385 * except for the unreserved characters. 445 * except for the unreserved characters.
386 * 446 *
387 * The fragment component is set through [fragment]. 447 * The fragment component is set through [fragment].
388 */ 448 */
389 Uri({String scheme, 449 Uri({String scheme,
390 this.userInfo: "", 450 String userInfo: "",
391 String host: "", 451 String host: "",
392 port: 0, 452 port: 0,
393 String path, 453 String path,
394 Iterable<String> pathSegments, 454 Iterable<String> pathSegments,
395 String query, 455 String query,
396 Map<String, String> queryParameters, 456 Map<String, String> queryParameters,
397 fragment: ""}) : 457 fragment: ""}) :
398 scheme = _makeScheme(scheme), 458 scheme = _makeScheme(scheme),
459 userInfo = _makeUserInfo(userInfo),
399 _host = _makeHost(host), 460 _host = _makeHost(host),
400 query = _makeQuery(query, queryParameters), 461 query = _makeQuery(query, queryParameters),
401 fragment = _makeFragment(fragment) { 462 fragment = _makeFragment(fragment) {
402 // Perform scheme specific normalization. 463 // Perform scheme specific normalization.
403 if (scheme == "http" && port == 80) { 464 if (scheme == "http" && port == 80) {
404 _port = 0; 465 _port = 0;
405 } else if (scheme == "https" && port == 443) { 466 } else if (scheme == "https" && port == 443) {
406 _port = 0; 467 _port = 0;
407 } else { 468 } else {
408 _port = port; 469 _port = port;
(...skipping 351 matching lines...) Expand 10 before | Expand all | Expand 10 after
760 */ 821 */
761 Map<String, String> get queryParameters { 822 Map<String, String> get queryParameters {
762 if (_queryParameters == null) { 823 if (_queryParameters == null) {
763 _queryParameters = new UnmodifiableMapView(splitQueryString(query)); 824 _queryParameters = new UnmodifiableMapView(splitQueryString(query));
764 } 825 }
765 return _queryParameters; 826 return _queryParameters;
766 } 827 }
767 828
768 static String _makeHost(String host) { 829 static String _makeHost(String host) {
769 if (host == null || host.isEmpty) return host; 830 if (host == null || host.isEmpty) return host;
831 // Host is an IPv6 address if it starts with '[' or contains a colon.
770 if (host.codeUnitAt(0) == _LEFT_BRACKET) { 832 if (host.codeUnitAt(0) == _LEFT_BRACKET) {
771 if (host.codeUnitAt(host.length - 1) != _RIGHT_BRACKET) { 833 if (host.codeUnitAt(host.length - 1) != _RIGHT_BRACKET) {
772 throw new FormatException('Missing end `]` to match `[` in host'); 834 throw new FormatException('Missing end `]` to match `[` in host');
773 } 835 }
774 parseIPv6Address(host.substring(1, host.length - 1)); 836 parseIPv6Address(host.substring(1, host.length - 1));
775 return host; 837 return host;
776 } 838 }
839 // TODO(lrn): skip if too short to be a valid IPv6 address.
777 for (int i = 0; i < host.length; i++) { 840 for (int i = 0; i < host.length; i++) {
778 if (host.codeUnitAt(i) == _COLON) { 841 if (host.codeUnitAt(i) == _COLON) {
779 parseIPv6Address(host); 842 parseIPv6Address(host);
780 return '[$host]'; 843 return '[$host]';
781 } 844 }
782 } 845 }
783 return host; 846 return _normalizeRegName(host);
784 } 847 }
785 848
786 static String _makeScheme(String scheme) { 849 static bool _isRegNameChar(int char) {
787 bool isSchemeLowerCharacter(int ch) { 850 return char < 127 && (_regNameTable[char >> 4] & (1 << (char & 0xf))) != 0;
788 return ch < 128 && 851 }
789 ((_schemeLowerTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
790 }
791 852
792 if (scheme == null) return ""; 853 /**
793 bool allLowercase = true; 854 * Validates and does case- and percent-encoding normalization.
794 int length = scheme.length; 855 *
795 for (int i = 0; i < length; i++) { 856 * The [host] must be an RFC3986 "reg-name". It is converted
796 int codeUnit = scheme.codeUnitAt(i); 857 * to lower case, and percent escapes are converted to either
797 if (i == 0 && !_isAlphabeticCharacter(codeUnit)) { 858 * lower case unreserved characters or upper case escapes.
798 // First code unit must be an alphabetic character. 859 */
799 throw new ArgumentError('Illegal scheme: $scheme'); 860 static String _normalizeRegName(String host) {
800 } 861 StringBuffer buffer;
801 if (!isSchemeLowerCharacter(codeUnit)) { 862 int sectionStart = 0;
802 if (_isSchemeCharacter(codeUnit)) { 863 int index = 0;
803 allLowercase = false; 864 // Whether all characters between sectionStart and index are normalized,
804 } else { 865 bool isNormalized = true;
805 throw new ArgumentError('Illegal scheme: $scheme'); 866
867 while (index < host.length) {
868 int char = host.codeUnitAt(index);
869 if (char == _PERCENT) {
870 // The _regNameTable contains "%", so we check that first.
871 String replacement = _normalizeEscape(host, index, true);
872 if (replacement == null && isNormalized) {
873 index += 3;
874 continue;
806 } 875 }
876 if (buffer == null) buffer = new StringBuffer();
877 String slice = host.substring(sectionStart, index);
878 if (!isNormalized) slice = slice.toLowerCase();
879 buffer.write(slice);
880 if (replacement == null) replacement = host.substring(index, index + 3);
881 buffer.write(replacement);
882 index += 3;
883 sectionStart = index;
884 isNormalized = true;
885 } else if (_isRegNameChar(char)) {
886 if (isNormalized && _UPPER_CASE_A <= char && _UPPER_CASE_Z >= char) {
887 // Put initial slice in buffer and continue in non-normalized mode
888 if (buffer == null) buffer = new StringBuffer();
889 if (sectionStart < index) {
890 buffer.write(host.substring(sectionStart, index));
891 sectionStart = index;
892 }
893 isNormalized = false;
894 }
895 index++;
896 } else {
897 _fail(host, index, "Invalid character");
807 } 898 }
808 } 899 }
900 if (buffer == null) return host;
901 if (sectionStart < host.length) {
902 String slice = host.substring(sectionStart);
903 if (!isNormalized) slice = slice.toLowerCase();
904 buffer.write(slice);
905 }
906 return buffer.toString();
907 }
809 908
909 /**
910 * Validates scheme characters and does case-normalization.
911 *
912 * Schemes are converted to lower case. They cannot contain
Søren Gjesse 2014/06/13 10:13:16 Missing end of sentence.
Lasse Reichstein Nielsen 2014/06/13 11:53:04 Done.
913 */
914 static String _makeScheme(String scheme) {
915 if (scheme == null || scheme.isEmpty) return "";
916 int char = scheme.codeUnitAt(0);
917 if (!_isAlphabeticCharacter(char)) {
918 _fail(scheme, 0, "Non-alphabetic character starting scheme");
919 }
920 bool allLowercase = char > _LOWER_CASE_A;
921 for (int i = 0; i < scheme.length; i++) {
922 int codeUnit = scheme.codeUnitAt(i);
923 if (!_isSchemeCharacter(codeUnit)) {
924 _fail(scheme, i, "Illegal scheme character");
925 }
926 if (_LOWER_CASE_A <= codeUnit && _LOWER_CASE_Z >= codeUnit) {
927 allLowercase = false;
928 }
929 }
810 return allLowercase ? scheme : scheme.toLowerCase(); 930 return allLowercase ? scheme : scheme.toLowerCase();
811 } 931 }
812 932
933 static String _makeUserInfo(String userInfo) {
934 if (userInfo == null) return "null";
935 return _normalize(userInfo, _userinfoTable);
936 }
937
938 static bool _isPathCharacter(int ch) {
939 return ch < 128 && ((_pathCharTable[ch >> 4] & (1 << (ch & 0x0f))) != 0) ||
940 ch == _SLASH;
941 }
942
813 String _makePath(String path, Iterable<String> pathSegments) { 943 String _makePath(String path, Iterable<String> pathSegments) {
814 if (path == null && pathSegments == null) return ""; 944 if (path == null && pathSegments == null) return "";
815 if (path != null && pathSegments != null) { 945 if (path != null && pathSegments != null) {
816 throw new ArgumentError('Both path and pathSegments specified'); 946 throw new ArgumentError('Both path and pathSegments specified');
817 } 947 }
948 // TODO(lrn): Do path normalization to remove /./ and /../ segments.
818 var result; 949 var result;
819 if (path != null) { 950 if (path != null) {
820 result = _normalize(path); 951 result = _normalize(path, _pathCharOrSlashTable);
821 } else { 952 } else {
822 result = pathSegments.map((s) => _uriEncode(_pathCharTable, s)).join("/"); 953 result = pathSegments.map((s) => _uriEncode(_pathCharTable, s)).join("/");
823 } 954 }
824 if ((hasAuthority || (scheme == "file")) && 955 if ((hasAuthority || (scheme == "file")) &&
825 result.isNotEmpty && !result.startsWith("/")) { 956 result.isNotEmpty && !result.startsWith("/")) {
826 return "/$result"; 957 return "/$result";
827 } 958 }
828 return result; 959 return result;
829 } 960 }
830 961
831 static String _makeQuery(String query, Map<String, String> queryParameters) { 962 static String _makeQuery(String query, Map<String, String> queryParameters) {
832 if (query == null && queryParameters == null) return ""; 963 if (query == null && queryParameters == null) return "";
833 if (query != null && queryParameters != null) { 964 if (query != null && queryParameters != null) {
834 throw new ArgumentError('Both query and queryParameters specified'); 965 throw new ArgumentError('Both query and queryParameters specified');
835 } 966 }
836 if (query != null) return _normalize(query); 967 if (query != null) return _normalize(query, _queryCharTable);
837 968
838 var result = new StringBuffer(); 969 var result = new StringBuffer();
839 var first = true; 970 var first = true;
840 queryParameters.forEach((key, value) { 971 queryParameters.forEach((key, value) {
841 if (!first) { 972 if (!first) {
842 result.write("&"); 973 result.write("&");
843 } 974 }
844 first = false; 975 first = false;
845 result.write(Uri.encodeQueryComponent(key)); 976 result.write(Uri.encodeQueryComponent(key));
846 if (value != null && !value.isEmpty) { 977 if (value != null && !value.isEmpty) {
847 result.write("="); 978 result.write("=");
848 result.write(Uri.encodeQueryComponent(value)); 979 result.write(Uri.encodeQueryComponent(value));
849 } 980 }
850 }); 981 });
851 return result.toString(); 982 return result.toString();
852 } 983 }
853 984
854 static String _makeFragment(String fragment) { 985 static String _makeFragment(String fragment) {
855 if (fragment == null) return ""; 986 if (fragment == null) return "";
856 return _normalize(fragment); 987 return _normalize(fragment, _queryCharTable);
857 } 988 }
858 989
859 static String _normalize(String component) { 990 static bool _isLowerCaseHexDigit(int digit) {
860 int index = component.indexOf('%'); 991 return _LOWER_CASE_A <= digit && digit <= _LOWER_CASE_F;
861 if (index < 0) return component; 992 }
862 993
863 bool isNormalizedHexDigit(int digit) { 994 /** Returns whether char is a hex digit. */
864 return (_ZERO <= digit && digit <= _NINE) || 995 static bool _isHexDigit(int char) {
865 (_UPPER_CASE_A <= digit && digit <= _UPPER_CASE_F); 996 if (_NINE >= char) return _ZERO <= char;
997 char |= 0x20;
998 return _LOWER_CASE_A <= char && _LOWER_CASE_F >= char;
999 }
1000
1001 /** Returns value of char as hex digit. */
1002 static int _hexValue(int digit) {
1003 assert(_isHexDigit(digit));
1004 if (_NINE >= digit) return digit - _ZERO;
1005 return (digit | 0x20) - (_LOWER_CASE_A - 10);
1006 }
1007
1008 /**
1009 * Performs RFC 3986 Percent-Encoding Normalization.
1010 *
1011 * Returns a replacement string that should be replace the original escape.
1012 * Returns null if no replacement is necessary because the escape is
1013 * not for an unreserved character and is already non-lower-case.
1014 *
1015 * If [lowerCase] is true, a single character returned is always lower case,
1016 */
1017 static String _normalizeEscape(String source, int index, bool lowerCase) {
1018 assert(source.codeUnitAt(index) == _PERCENT);
1019 if (index + 2 >= source.length) {
1020 _fail(source, index, "Unterminated percent escape");
866 } 1021 }
1022 int firstDigit = source.codeUnitAt(index + 1);
1023 int secondDigit = source.codeUnitAt(index + 2);
1024 if (!_isHexDigit(firstDigit) || !_isHexDigit(secondDigit)) {
1025 _fail(source, index, "Invalid escape");
1026 }
1027 int value = _hexValue(firstDigit) * 16 + _hexValue(secondDigit);
1028 if (_isUnreservedChar(value)) {
1029 if (lowerCase && _UPPER_CASE_A <= value && _UPPER_CASE_Z >= value) {
1030 value |= 0x20;
1031 }
1032 return new String.fromCharCode(value);
1033 }
1034 if (firstDigit >= _LOWER_CASE_A || secondDigit >= _LOWER_CASE_A) {
1035 // Either digit is lower case.
1036 return source.substring(index, index + 3).toUpperCase();
1037 }
1038 return null;
1039 }
867 1040
868 bool isLowerCaseHexDigit(int digit) { 1041 static bool _isUnreservedChar(int ch) {
869 return _LOWER_CASE_A <= digit && digit <= _LOWER_CASE_F; 1042 return ch < 127 &&
870 } 1043 ((_unreservedTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
1044 }
871 1045
872 bool isUnreserved(int ch) {
873 return ch < 128 &&
874 ((_unreservedTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
875 }
876 1046
877 int normalizeHexDigit(int index) { 1047 /**
878 var codeUnit = component.codeUnitAt(index); 1048 * Runs through component checking that each character is valid and
879 if (isLowerCaseHexDigit(codeUnit)) { 1049 * normalize percent escapes.
880 return codeUnit - 0x20; 1050 *
881 } else if (!isNormalizedHexDigit(codeUnit)) { 1051 * Uses [charTable] to check if a non-`%` character is allowed.
882 throw new ArgumentError("Invalid URI component: $component"); 1052 * Each `%` character must be followed by two hex digits.
1053 * If the hex-digits are lower case letters, they are converted to
1054 * upper case.
1055 */
1056 static String _normalize(String component, List<int> charTable) {
1057 StringBuffer buffer;
1058 int sectionStart = 0;
1059 int index = 0;
1060 // Loop while characters are valid and escapes correct and upper-case.
1061 while (index < component.length) {
1062 int char = component.codeUnitAt(index);
1063 if (char < 127 && (charTable[char >> 4] & (1 << (char & 0x0f))) != 0) {
1064 index++;
1065 } else if (char == _PERCENT) {
1066 String replacement = _normalizeEscape(component, index, false);
1067 if (replacement == null) {
1068 // _normalizeEscape returns null if no replacement necessary.
1069 index += 3;
1070 continue;
1071 } else {
1072 if (buffer == null) buffer = new StringBuffer();
1073 buffer.write(component.substring(sectionStart, index));
1074 buffer.write(replacement);
1075 index += 3;
1076 sectionStart = index;
1077 }
883 } else { 1078 } else {
884 return codeUnit; 1079 _fail(component, index, "Invalid character");
885 } 1080 }
886 } 1081 }
887 1082 if (buffer == null) return component;
888 int decodeHexDigitPair(int index) { 1083 if (sectionStart < component.length) {
889 int byte = 0; 1084 buffer.write(component.substring(sectionStart));
890 for (int i = 0; i < 2; i++) {
891 var codeUnit = component.codeUnitAt(index + i);
892 if (_ZERO <= codeUnit && codeUnit <= _NINE) {
893 byte = byte * 16 + codeUnit - _ZERO;
894 } else {
895 // Check ranges A-F (0x41-0x46) and a-f (0x61-0x66).
896 codeUnit |= 0x20;
897 if (_LOWER_CASE_A <= codeUnit &&
898 codeUnit <= _LOWER_CASE_F) {
899 byte = byte * 16 + codeUnit - _LOWER_CASE_A + 10;
900 } else {
901 throw new ArgumentError(
902 "Invalid percent-encoding in URI component: $component");
903 }
904 }
905 }
906 return byte;
907 } 1085 }
908 1086 return buffer.toString();
909 // Start building the normalized component string.
910 StringBuffer result;
911 int length = component.length;
912 int prevIndex = 0;
913
914 // Copy a part of the component string to the result.
915 void fillResult() {
916 if (result == null) {
917 assert(prevIndex == 0);
918 result = new StringBuffer(component.substring(prevIndex, index));
919 } else {
920 result.write(component.substring(prevIndex, index));
921 }
922 }
923
924 while (index < length) {
925 // Normalize percent-encoding to uppercase and don't encode
926 // unreserved characters.
927 assert(component.codeUnitAt(index) == _PERCENT);
928 if (length < index + 2) {
929 throw new ArgumentError(
930 "Invalid percent-encoding in URI component: $component");
931 }
932
933 var codeUnit1 = component.codeUnitAt(index + 1);
934 var codeUnit2 = component.codeUnitAt(index + 2);
935 var decodedCodeUnit = decodeHexDigitPair(index + 1);
936 if (isNormalizedHexDigit(codeUnit1) &&
937 isNormalizedHexDigit(codeUnit2) &&
938 !isUnreserved(decodedCodeUnit)) {
939 index += 3;
940 } else {
941 fillResult();
942 if (isUnreserved(decodedCodeUnit)) {
943 result.writeCharCode(decodedCodeUnit);
944 } else {
945 result.write("%");
946 result.writeCharCode(normalizeHexDigit(index + 1));
947 result.writeCharCode(normalizeHexDigit(index + 2));
948 }
949 index += 3;
950 prevIndex = index;
951 }
952 int next = component.indexOf('%', index);
953 if (next >= index) {
954 index = next;
955 } else {
956 index = length;
957 }
958 }
959 if (result == null) return component;
960
961 if (result != null && prevIndex != index) fillResult();
962 assert(index == length);
963
964 return result.toString();
965 } 1087 }
966 1088
967 static bool _isSchemeCharacter(int ch) { 1089 static bool _isSchemeCharacter(int ch) {
968 return ch < 128 && ((_schemeTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); 1090 return ch < 128 && ((_schemeTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
969 } 1091 }
970 1092
971
972 /** 1093 /**
973 * Returns whether the URI is absolute. 1094 * Returns whether the URI is absolute.
974 */ 1095 */
975 bool get isAbsolute => scheme != "" && fragment == ""; 1096 bool get isAbsolute => scheme != "" && fragment == "";
976 1097
977 String _merge(String base, String reference) { 1098 String _merge(String base, String reference) {
978 if (base == "") return "/$reference"; 1099 if (base == "") return "/$reference";
979 return "${base.substring(0, base.lastIndexOf("/") + 1)}$reference"; 1100 return "${base.substring(0, base.lastIndexOf("/") + 1)}$reference";
980 } 1101 }
981 1102
(...skipping 852 matching lines...) Expand 10 before | Expand all | Expand 10 after
1834 0x2bff, // 0x30 - 0x3f 1111111111010100 1955 0x2bff, // 0x30 - 0x3f 1111111111010100
1835 // ABCDEFGHIJKLMNO 1956 // ABCDEFGHIJKLMNO
1836 0xfffe, // 0x40 - 0x4f 0111111111111111 1957 0xfffe, // 0x40 - 0x4f 0111111111111111
1837 // PQRSTUVWXYZ _ 1958 // PQRSTUVWXYZ _
1838 0x87ff, // 0x50 - 0x5f 1111111111100001 1959 0x87ff, // 0x50 - 0x5f 1111111111100001
1839 // abcdefghijklmno 1960 // abcdefghijklmno
1840 0xfffe, // 0x60 - 0x6f 0111111111111111 1961 0xfffe, // 0x60 - 0x6f 0111111111111111
1841 // pqrstuvwxyz ~ 1962 // pqrstuvwxyz ~
1842 0x47ff]; // 0x70 - 0x7f 1111111111100010 1963 0x47ff]; // 0x70 - 0x7f 1111111111100010
1843 1964
1965 // Characters allowed in the userinfo as of RFC 3986.
1966 // RFC 3986 Apendix A
1967 // userinfo = *( unreserved / pct-encoded / sub-delims / ':')
1968 static const _userinfoTable = const [
1969 // LSB MSB
1970 // | |
1971 0x0000, // 0x00 - 0x0f 0000000000000000
1972 0x0000, // 0x10 - 0x1f 0000000000000000
1973 // ! $ &'()*+,-.
1974 0x7fd2, // 0x20 - 0x2f 0100101111111110
1975 // 0123456789:; =
1976 0x2fff, // 0x30 - 0x3f 1111111111110100
1977 // ABCDEFGHIJKLMNO
1978 0xfffe, // 0x40 - 0x4f 0111111111111111
1979 // PQRSTUVWXYZ _
1980 0x87ff, // 0x50 - 0x5f 1111111111100001
1981 // abcdefghijklmno
1982 0xfffe, // 0x60 - 0x6f 0111111111111111
1983 // pqrstuvwxyz ~
1984 0x47ff]; // 0x70 - 0x7f 1111111111100010
1985
1844 // Characters allowed in the path as of RFC 3986. 1986 // Characters allowed in the path as of RFC 3986.
1845 // RFC 3986 section 3.3. 1987 // RFC 3986 section 3.3.
1846 // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 1988 // pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
1847 static const _pathCharTable = const [ 1989 static const _pathCharTable = const [
1848 // LSB MSB 1990 // LSB MSB
1849 // | | 1991 // | |
1850 0x0000, // 0x00 - 0x0f 0000000000000000 1992 0x0000, // 0x00 - 0x0f 0000000000000000
1851 0x0000, // 0x10 - 0x1f 0000000000000000 1993 0x0000, // 0x10 - 0x1f 0000000000000000
1852 // ! $ &'()*+,-. 1994 // ! $ &'()*+,-.
1853 0x7fd2, // 0x20 - 0x2f 0100101111111110 1995 0x7fd2, // 0x20 - 0x2f 0100101111111110
1854 // 0123456789:; = 1996 // 0123456789:; =
1855 0x2fff, // 0x30 - 0x3f 1111111111110100 1997 0x2fff, // 0x30 - 0x3f 1111111111110100
1856 // @ABCDEFGHIJKLMNO 1998 // @ABCDEFGHIJKLMNO
1857 0xffff, // 0x40 - 0x4f 1111111111111111 1999 0xffff, // 0x40 - 0x4f 1111111111111111
1858 // PQRSTUVWXYZ _ 2000 // PQRSTUVWXYZ _
1859 0x87ff, // 0x50 - 0x5f 1111111111100001 2001 0x87ff, // 0x50 - 0x5f 1111111111100001
1860 // abcdefghijklmno 2002 // abcdefghijklmno
1861 0xfffe, // 0x60 - 0x6f 0111111111111111 2003 0xfffe, // 0x60 - 0x6f 0111111111111111
1862 // pqrstuvwxyz ~ 2004 // pqrstuvwxyz ~
1863 0x47ff]; // 0x70 - 0x7f 1111111111100010 2005 0x47ff]; // 0x70 - 0x7f 1111111111100010
1864 2006
2007 // Characters allowed in the path as of RFC 3986.
2008 // RFC 3986 section 3.3 *and* slash.
2009 static const _pathCharOrSlashTable = const [
2010 // LSB MSB
2011 // | |
2012 0x0000, // 0x00 - 0x0f 0000000000000000
2013 0x0000, // 0x10 - 0x1f 0000000000000000
2014 // ! $ &'()*+,-./
2015 0xffd2, // 0x20 - 0x2f 0100101111111111
2016 // 0123456789:; =
2017 0x2fff, // 0x30 - 0x3f 1111111111110100
2018 // @ABCDEFGHIJKLMNO
2019 0xffff, // 0x40 - 0x4f 1111111111111111
2020 // PQRSTUVWXYZ _
2021 0x87ff, // 0x50 - 0x5f 1111111111100001
2022 // abcdefghijklmno
2023 0xfffe, // 0x60 - 0x6f 0111111111111111
2024 // pqrstuvwxyz ~
2025 0x47ff]; // 0x70 - 0x7f 1111111111100010
2026
1865 // Characters allowed in the query as of RFC 3986. 2027 // Characters allowed in the query as of RFC 3986.
1866 // RFC 3986 section 3.4. 2028 // RFC 3986 section 3.4.
1867 // query = *( pchar / "/" / "?" ) 2029 // query = *( pchar / "/" / "?" )
1868 static const _queryCharTable = const [ 2030 static const _queryCharTable = const [
1869 // LSB MSB 2031 // LSB MSB
1870 // | | 2032 // | |
1871 0x0000, // 0x00 - 0x0f 0000000000000000 2033 0x0000, // 0x00 - 0x0f 0000000000000000
1872 0x0000, // 0x10 - 0x1f 0000000000000000 2034 0x0000, // 0x10 - 0x1f 0000000000000000
1873 // ! $ &'()*+,-./ 2035 // ! $ &'()*+,-./
1874 0xffd2, // 0x20 - 0x2f 0100101111111111 2036 0xffd2, // 0x20 - 0x2f 0100101111111111
1875 // 0123456789:; = ? 2037 // 0123456789:; = ?
1876 0xafff, // 0x30 - 0x3f 1111111111110101 2038 0xafff, // 0x30 - 0x3f 1111111111110101
1877 // @ABCDEFGHIJKLMNO 2039 // @ABCDEFGHIJKLMNO
1878 0xffff, // 0x40 - 0x4f 1111111111111111 2040 0xffff, // 0x40 - 0x4f 1111111111111111
1879 // PQRSTUVWXYZ _ 2041 // PQRSTUVWXYZ _
1880 0x87ff, // 0x50 - 0x5f 1111111111100001 2042 0x87ff, // 0x50 - 0x5f 1111111111100001
1881 // abcdefghijklmno 2043 // abcdefghijklmno
1882 0xfffe, // 0x60 - 0x6f 0111111111111111 2044 0xfffe, // 0x60 - 0x6f 0111111111111111
1883 // pqrstuvwxyz ~ 2045 // pqrstuvwxyz ~
1884 0x47ff]; // 0x70 - 0x7f 1111111111100010 2046 0x47ff]; // 0x70 - 0x7f 1111111111100010
1885 } 2047 }
OLDNEW
« no previous file with comments | « pkg/json_rpc_2/test/server/parameters_test.dart ('k') | tests/corelib/uri_test.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698