OLD | NEW |
---|---|
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of dart.core; | 5 part of dart.core; |
6 | 6 |
7 /** | 7 /** |
8 * A parsed URI, such as a URL. | 8 * A parsed URI, such as a URL. |
9 * | 9 * |
10 * **See also:** | 10 * **See also:** |
(...skipping 1224 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1235 bool isFile = (scheme == "file"); | 1235 bool isFile = (scheme == "file"); |
1236 bool ensureLeadingSlash = isFile || hasAuthority; | 1236 bool ensureLeadingSlash = isFile || hasAuthority; |
1237 if (path == null && pathSegments == null) return isFile ? "/" : ""; | 1237 if (path == null && pathSegments == null) return isFile ? "/" : ""; |
1238 if (path != null && pathSegments != null) { | 1238 if (path != null && pathSegments != null) { |
1239 throw new ArgumentError('Both path and pathSegments specified'); | 1239 throw new ArgumentError('Both path and pathSegments specified'); |
1240 } | 1240 } |
1241 var result; | 1241 var result; |
1242 if (path != null) { | 1242 if (path != null) { |
1243 result = _normalize(path, start, end, _pathCharOrSlashTable); | 1243 result = _normalize(path, start, end, _pathCharOrSlashTable); |
1244 } else { | 1244 } else { |
1245 result = pathSegments.map((s) => _uriEncode(_pathCharTable, s)).join("/"); | 1245 result = pathSegments.map((s) => |
1246 _uriEncodeUtf8(_pathCharTable, s, false)).join("/"); | |
1246 } | 1247 } |
1247 if (result.isEmpty) { | 1248 if (result.isEmpty) { |
1248 if (isFile) return "/"; | 1249 if (isFile) return "/"; |
1249 } else if (ensureLeadingSlash && !result.startsWith('/')) { | 1250 } else if (ensureLeadingSlash && !result.startsWith('/')) { |
1250 result = "/" + result; | 1251 result = "/" + result; |
1251 } | 1252 } |
1252 result = _normalizePath(result, scheme, hasAuthority); | 1253 result = _normalizePath(result, scheme, hasAuthority); |
1253 return result; | 1254 return result; |
1254 } | 1255 } |
1255 | 1256 |
(...skipping 689 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1945 * string. | 1946 * string. |
1946 * | 1947 * |
1947 * For encoding the query part consider using | 1948 * For encoding the query part consider using |
1948 * [encodeQueryComponent]. | 1949 * [encodeQueryComponent]. |
1949 * | 1950 * |
1950 * To avoid the need for explicitly encoding use the [pathSegments] | 1951 * To avoid the need for explicitly encoding use the [pathSegments] |
1951 * and [queryParameters] optional named arguments when constructing | 1952 * and [queryParameters] optional named arguments when constructing |
1952 * a [Uri]. | 1953 * a [Uri]. |
1953 */ | 1954 */ |
1954 static String encodeComponent(String component) { | 1955 static String encodeComponent(String component) { |
1955 return _uriEncode(_unreserved2396Table, component); | 1956 return _uriEncodeUtf8(_unreserved2396Table, component, false); |
1956 } | 1957 } |
1957 | 1958 |
1958 /** | 1959 /** |
1959 * Encode the string [component] according to the HTML 4.01 rules | 1960 * Encode the string [component] according to the HTML 4.01 rules |
1960 * for encoding the posting of a HTML form as a query string | 1961 * for encoding the posting of a HTML form as a query string |
1961 * component. | 1962 * component. |
1962 * | 1963 * |
1963 * Encode the string [component] according to the HTML 4.01 rules | 1964 * Encode the string [component] according to the HTML 4.01 rules |
1964 * for encoding the posting of a HTML form as a query string | 1965 * for encoding the posting of a HTML form as a query string |
1965 * component. | 1966 * component. |
(...skipping 17 matching lines...) Expand all Loading... | |
1983 * | 1984 * |
1984 * To avoid the need for explicitly encoding the query use the | 1985 * To avoid the need for explicitly encoding the query use the |
1985 * [queryParameters] optional named arguments when constructing a | 1986 * [queryParameters] optional named arguments when constructing a |
1986 * [Uri]. | 1987 * [Uri]. |
1987 * | 1988 * |
1988 * See http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.2 for more | 1989 * See http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.2 for more |
1989 * details. | 1990 * details. |
1990 */ | 1991 */ |
1991 static String encodeQueryComponent(String component, | 1992 static String encodeQueryComponent(String component, |
1992 {Encoding encoding: UTF8}) { | 1993 {Encoding encoding: UTF8}) { |
1994 const int spaceToPlus = true; | |
sra1
2015/11/03 17:28:42
checked mode error
| |
1993 return _uriEncode( | 1995 return _uriEncode( |
1994 _unreservedTable, component, encoding: encoding, spaceToPlus: true); | 1996 _unreservedTable, component, encoding, spaceToPlus); |
1995 } | 1997 } |
1996 | 1998 |
1997 /** | 1999 /** |
1998 * Decodes the percent-encoding in [encodedComponent]. | 2000 * Decodes the percent-encoding in [encodedComponent]. |
1999 * | 2001 * |
2000 * Note that decoding a URI component might change its meaning as | 2002 * Note that decoding a URI component might change its meaning as |
2001 * some of the decoded characters could be characters with are | 2003 * some of the decoded characters could be characters with are |
2002 * delimiters for a given URI componene type. Always split a URI | 2004 * delimiters for a given URI componene type. Always split a URI |
2003 * component using the delimiters for the component before decoding | 2005 * component using the delimiters for the component before decoding |
2004 * the individual parts. | 2006 * the individual parts. |
(...skipping 23 matching lines...) Expand all Loading... | |
2028 /** | 2030 /** |
2029 * Encode the string [uri] using percent-encoding to make it | 2031 * Encode the string [uri] using percent-encoding to make it |
2030 * safe for literal use as a full URI. | 2032 * safe for literal use as a full URI. |
2031 * | 2033 * |
2032 * All characters except uppercase and lowercase letters, digits and | 2034 * All characters except uppercase and lowercase letters, digits and |
2033 * the characters `!#$&'()*+,-./:;=?@_~` are percent-encoded. This | 2035 * the characters `!#$&'()*+,-./:;=?@_~` are percent-encoded. This |
2034 * is the set of characters specified in in ECMA-262 version 5.1 for | 2036 * is the set of characters specified in in ECMA-262 version 5.1 for |
2035 * the encodeURI function . | 2037 * the encodeURI function . |
2036 */ | 2038 */ |
2037 static String encodeFull(String uri) { | 2039 static String encodeFull(String uri) { |
2038 return _uriEncode(_encodeFullTable, uri); | 2040 return _uriEncodeUtf8(_encodeFullTable, uri, false); |
2039 } | 2041 } |
2040 | 2042 |
2041 /** | 2043 /** |
2042 * Decodes the percent-encoding in [uri]. | 2044 * Decodes the percent-encoding in [uri]. |
2043 * | 2045 * |
2044 * Note that decoding a full URI might change its meaning as some of | 2046 * Note that decoding a full URI might change its meaning as some of |
2045 * the decoded characters could be reserved characters. In most | 2047 * the decoded characters could be reserved characters. In most |
2046 * cases an encoded URI should be parsed into components using | 2048 * cases an encoded URI should be parsed into components using |
2047 * [Uri.parse] before decoding the separate components. | 2049 * [Uri.parse] before decoding the separate components. |
2048 */ | 2050 */ |
(...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2234 static const int _DOT = 0x2E; | 2236 static const int _DOT = 0x2E; |
2235 static const int _SLASH = 0x2F; | 2237 static const int _SLASH = 0x2F; |
2236 static const int _ZERO = 0x30; | 2238 static const int _ZERO = 0x30; |
2237 static const int _NINE = 0x39; | 2239 static const int _NINE = 0x39; |
2238 static const int _COLON = 0x3A; | 2240 static const int _COLON = 0x3A; |
2239 static const int _LESS = 0x3C; | 2241 static const int _LESS = 0x3C; |
2240 static const int _GREATER = 0x3E; | 2242 static const int _GREATER = 0x3E; |
2241 static const int _QUESTION = 0x3F; | 2243 static const int _QUESTION = 0x3F; |
2242 static const int _AT_SIGN = 0x40; | 2244 static const int _AT_SIGN = 0x40; |
2243 static const int _UPPER_CASE_A = 0x41; | 2245 static const int _UPPER_CASE_A = 0x41; |
2246 static const int _UPPER_CASE_E = 0x45; | |
2244 static const int _UPPER_CASE_F = 0x46; | 2247 static const int _UPPER_CASE_F = 0x46; |
2245 static const int _UPPER_CASE_Z = 0x5A; | 2248 static const int _UPPER_CASE_Z = 0x5A; |
2246 static const int _LEFT_BRACKET = 0x5B; | 2249 static const int _LEFT_BRACKET = 0x5B; |
2247 static const int _BACKSLASH = 0x5C; | 2250 static const int _BACKSLASH = 0x5C; |
2248 static const int _RIGHT_BRACKET = 0x5D; | 2251 static const int _RIGHT_BRACKET = 0x5D; |
2249 static const int _LOWER_CASE_A = 0x61; | 2252 static const int _LOWER_CASE_A = 0x61; |
2250 static const int _LOWER_CASE_F = 0x66; | 2253 static const int _LOWER_CASE_F = 0x66; |
2251 static const int _LOWER_CASE_Z = 0x7A; | 2254 static const int _LOWER_CASE_Z = 0x7A; |
2252 static const int _BAR = 0x7C; | 2255 static const int _BAR = 0x7C; |
2253 | 2256 |
2257 static const String _hexDigits = "0123456789ABCDEF"; | |
2258 | |
2254 /** | 2259 /** |
2255 * This is the internal implementation of JavaScript's encodeURI function. | 2260 * This is the internal implementation of JavaScript's encodeURI function. |
2256 * It encodes all characters in the string [text] except for those | 2261 * It encodes all characters in the string [text] except for those |
2257 * that appear in [canonicalTable], and returns the escaped string. | 2262 * that appear in [canonicalTable], and returns the escaped string. |
2258 */ | 2263 */ |
2259 static String _uriEncode(List<int> canonicalTable, | 2264 static String _uriEncode(List<int> canonicalTable, |
2260 String text, | 2265 String text, |
2261 {Encoding encoding: UTF8, | 2266 Encoding encoding, |
2262 bool spaceToPlus: false}) { | 2267 bool spaceToPlus) { |
2263 byteToHex(byte, buffer) { | 2268 // Use a specialized encoder for known Unicode-compatible encodings. |
2264 const String hex = '0123456789ABCDEF'; | 2269 // This avoids encoding the string first and then working on the bytes, |
2265 buffer.writeCharCode(hex.codeUnitAt(byte >> 4)); | 2270 // and instead works directly on the code units of the string. |
2266 buffer.writeCharCode(hex.codeUnitAt(byte & 0x0f)); | 2271 if (identical(encoding, UTF8)) { |
2272 return _uriEncodeUtf8(canonicalTable, text, spaceToPlus); | |
2273 } | |
2274 if (identical(encoding, LATIN1)) { | |
2275 return _uriEncodeSubset(canonicalTable, text, 255, spaceToPlus); | |
2276 } | |
2277 if (identical(encoding, ASCII)) { | |
2278 return _uriEncodeSubset(canonicalTable, text, 127, spaceToPlus); | |
2267 } | 2279 } |
2268 | 2280 |
2269 // Encode the string into bytes then generate an ASCII only string | 2281 // Encode the string into bytes then generate an ASCII only string |
2270 // by percent encoding selected bytes. | 2282 // by percent encoding selected bytes. |
2283 var bytes = encoding.encode(text); | |
2284 int i = 0; | |
2285 noChange: { | |
2286 while (i < bytes.length) { | |
2287 int byte = bytes[i]; | |
2288 if (byte < 128 && | |
2289 ((canonicalTable[byte >> 4] & (1 << (byte & 0x0f))) != 0)) { | |
2290 break noChange; | |
2291 } | |
2292 i++; | |
2293 } | |
2294 return text; | |
2295 } | |
2271 StringBuffer result = new StringBuffer(); | 2296 StringBuffer result = new StringBuffer(); |
2272 var bytes = encoding.encode(text); | 2297 for (int j = 0; j < i; j++) { |
2273 for (int i = 0; i < bytes.length; i++) { | 2298 result[j] = text.codeUnitAt(j); |
sra1
2015/11/03 17:28:42
StringBuffer does not have []=.
Lasse Reichstein Nielsen
2015/11/03 18:04:08
Duh, should be writeCharCode.
Obviously needs mor
| |
2274 int byte = bytes[i]; | 2299 } |
2275 if (byte < 128 && | 2300 while (true) { |
2276 ((canonicalTable[byte >> 4] & (1 << (byte & 0x0f))) != 0)) { | 2301 if (spaceToPlus && char == _SPACE) { |
2277 result.writeCharCode(byte); | |
2278 } else if (spaceToPlus && byte == _SPACE) { | |
2279 result.writeCharCode(_PLUS); | 2302 result.writeCharCode(_PLUS); |
2280 } else { | 2303 } else { |
2281 result.writeCharCode(_PERCENT); | 2304 result..writeCharCode(_PERCENT) |
2282 byteToHex(byte, result); | 2305 ..writeCharCode(_hexDigits.codeUnitAt(char >> 4)) |
2306 ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF)); | |
2307 } | |
2308 noChange: { // See dartbug.com/21481 | |
2309 while (++i < text.length) { | |
2310 char = text.codeUnitAt(i); | |
2311 if (char < 128 && | |
2312 (canonicalTable[char >> 4] & (1 << (char & 0xf)) != 0)) { | |
2313 result.writeCharCode(char); | |
2314 } else { | |
2315 break noChange; | |
2316 } | |
2317 } | |
2318 break; | |
2283 } | 2319 } |
2284 } | 2320 } |
2285 return result.toString(); | 2321 return result.toString(); |
2322 } | |
2323 | |
2324 /** | |
2325 * Encodes a text where the encoding is a subset of Unicode. | |
2326 * | |
2327 * The subsets are either Latin-1 or US-ASCII, and they are distinguished | |
2328 * by the [limit] parameter which is the maximal code point allowed | |
2329 * by the encoding. | |
2330 */ | |
2331 static String _uriEncodeSubset(List<int> canonicalTable, String text, | |
2332 int limit, bool spaceToPlus) { | |
2333 assert(limit == 127 || limit == 255); | |
2334 int i = 0; | |
2335 int char; | |
2336 noChange: { // See dartbug.com/21481 | |
2337 while (i < text.length) { | |
2338 char = text.codeUnitAt(i); | |
2339 if (char >= 128 || | |
2340 (canonicalTable[char >> 4] & (1 << (char & 0xf)) == 0)) { | |
2341 break noChange; | |
2342 } | |
2343 i++; | |
2344 } | |
2345 return text; | |
2346 } | |
2347 StringBuffer result = new StringBuffer(); | |
2348 for (int j = 0; j < i; j++) { | |
2349 result.writeCharCode(text.codeUnitAt(j)); | |
2350 } | |
2351 while (true) { | |
2352 if (char <= limit) { | |
2353 if (spaceToPlus && char == _SPACE) { | |
2354 result.writeCharCode(_PLUS); | |
2355 } else { | |
2356 result..writeCharCode(_PERCENT) | |
2357 ..writeCharCode(_hexDigits.codeUnitAt(char >> 4)) | |
2358 ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF)); | |
2359 } | |
2360 } else { | |
2361 if (limit == 255) { | |
2362 throw new ArgumentError.value( | |
2363 text, "Source contains non-Latin-1 characters."); | |
2364 } | |
2365 throw new ArgumentError.value( | |
2366 text, "Source contains non-ASCII bytes."); | |
2367 } | |
2368 noChange: { // See dartbug.com/21481 | |
2369 while (++i < text.length) { | |
2370 char = text.codeUnitAt(i); | |
2371 if (char < 128 && | |
2372 (canonicalTable[char >> 4] & (1 << (char & 0xf)) != 0)) { | |
2373 result.writeCharCode(char); | |
2374 } else { | |
2375 break noChange; | |
2376 } | |
2377 } | |
2378 break; | |
2379 } | |
2380 } | |
2381 return result.toString(); | |
2382 } | |
2383 | |
2384 static String _uriEncodeUtf8(List<int> canonicalTable, String text, | |
2385 bool spaceToPlus) { | |
2386 int i = 0; | |
2387 int char; | |
2388 noChange: { // See dartbug.com/21481 | |
2389 while (i < text.length) { | |
2390 char = text.codeUnitAt(i); | |
2391 if (char >= 128 || | |
2392 (canonicalTable[char >> 4] & (1 << (char & 0xf)) == 0)) { | |
2393 break noChange; | |
2394 } | |
2395 i++; | |
2396 } | |
2397 return text; | |
2398 } | |
2399 StringBuffer result = new StringBuffer(); | |
2400 for (int j = 0; j < i; j++) { | |
2401 result.writeCharCode(text.codeUnitAt(j)); | |
2402 } | |
2403 while (true) { | |
2404 if (char < 128) { | |
2405 if (spaceToPlus && char == _SPACE) { | |
2406 result.writeCharCode(_PLUS); | |
2407 } else { | |
2408 result..writeCharCode(_PERCENT) | |
2409 ..writeCharCode(_ZERO + (char >> 4)) // Range 0-7. | |
2410 ..writeCharCode(_hexDigits.codeUnitAt(char & 0x0F)); | |
2411 } | |
2412 } else if (char < 0x800) { | |
2413 result..writeCharCode(_PERCENT) | |
2414 ..writeCharCode(_hexDigits.codeUnitAt(0xC + (char >> 10))) | |
2415 ..writeCharCode(_hexDigits.codeUnitAt((char >> 6) & 0xF)) | |
2416 ..writeCharCode(_PERCENT) | |
2417 ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 4) & 3))) | |
2418 ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF)); | |
2419 } else { | |
2420 assert(char < 0x10000); // UTF-16 code unit. | |
2421 int next; | |
2422 if (char & 0xFC00 != 0xD800 || | |
2423 i + 1 == text.length || | |
2424 (next = text.codeUnitAt(i + 1)) & 0xFC00 != 0xDC00) { | |
2425 result..writeCharCode(_PERCENT) | |
2426 ..writeCharCode(_UPPER_CASE_E) | |
2427 ..writeCharCode(_hexDigits.codeUnitAt((char >> 12) & 0xF)) | |
2428 ..writeCharCode(_PERCENT) | |
2429 ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 10) & 3))) | |
2430 ..writeCharCode(_hexDigits.codeUnitAt((char >> 6) & 0xF)) | |
2431 ..writeCharCode(_PERCENT) | |
2432 ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 4) & 3))) | |
2433 ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF)); | |
2434 } else { | |
2435 // Lead surrogate followed by tail surrogate. | |
2436 char = 0x10000 + (((char & 0x3FF) << 10) | (next & 0x3FF)); | |
2437 i++; | |
2438 result..writeCharCode(_PERCENT) | |
2439 ..writeCharCode(_UPPER_CASE_F) | |
2440 ..writeCharCode(_ZERO + (char >> 18)) // Range 0..7 | |
2441 ..writeCharCode(_PERCENT) | |
2442 ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 16) & 3))) | |
2443 ..writeCharCode(_hexDigits.codeUnitAt((char >> 12) & 0xF)) | |
2444 ..writeCharCode(_PERCENT) | |
2445 ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 10) & 3))) | |
2446 ..writeCharCode(_hexDigits.codeUnitAt((char >> 6) & 0xF)) | |
2447 ..writeCharCode(_PERCENT) | |
2448 ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 4) & 3))) | |
2449 ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF)); | |
2450 } | |
2451 } | |
2452 noChange: { // See dartbug.com/21481 | |
2453 while (++i < text.length) { | |
2454 char = text.codeUnitAt(i); | |
2455 if (char < 128 && | |
2456 (canonicalTable[char >> 4] & (1 << (char & 0xf)) != 0)) { | |
2457 result.writeCharCode(char); | |
2458 } else { | |
2459 break noChange; | |
2460 } | |
2461 } | |
2462 // Return result.toString(), but move the return to the end of the | |
2463 // function to appease analysis. | |
2464 break; | |
2465 } | |
2466 } | |
2467 return result.toString(); | |
2286 } | 2468 } |
2287 | 2469 |
2288 /** | 2470 /** |
2289 * Convert a byte (2 character hex sequence) in string [s] starting | 2471 * Convert a byte (2 character hex sequence) in string [s] starting |
2290 * at position [pos] to its ordinal value | 2472 * at position [pos] to its ordinal value |
2291 */ | 2473 */ |
2292 static int _hexCharPairToByte(String s, int pos) { | 2474 static int _hexCharPairToByte(String s, int pos) { |
2293 int byte = 0; | 2475 int byte = 0; |
2294 for (int i = 0; i < 2; i++) { | 2476 for (int i = 0; i < 2; i++) { |
2295 var charCode = s.codeUnitAt(pos + i); | 2477 var charCode = s.codeUnitAt(pos + i); |
(...skipping 308 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2604 0xafff, // 0x30 - 0x3f 1111111111110101 | 2786 0xafff, // 0x30 - 0x3f 1111111111110101 |
2605 // @ABCDEFGHIJKLMNO | 2787 // @ABCDEFGHIJKLMNO |
2606 0xffff, // 0x40 - 0x4f 1111111111111111 | 2788 0xffff, // 0x40 - 0x4f 1111111111111111 |
2607 // PQRSTUVWXYZ _ | 2789 // PQRSTUVWXYZ _ |
2608 0x87ff, // 0x50 - 0x5f 1111111111100001 | 2790 0x87ff, // 0x50 - 0x5f 1111111111100001 |
2609 // abcdefghijklmno | 2791 // abcdefghijklmno |
2610 0xfffe, // 0x60 - 0x6f 0111111111111111 | 2792 0xfffe, // 0x60 - 0x6f 0111111111111111 |
2611 // pqrstuvwxyz ~ | 2793 // pqrstuvwxyz ~ |
2612 0x47ff]; // 0x70 - 0x7f 1111111111100010 | 2794 0x47ff]; // 0x70 - 0x7f 1111111111100010 |
2613 } | 2795 } |
OLD | NEW |