sdk/lib/core/uri.dart - Issue 1409053007: More optimization of _uriEncode.

Side by Side Diff: sdk/lib/core/uri.dart

Issue 1409053007: More optimization of _uriEncode. (Closed) Base URL: https://github.com/dart-lang/sdk.git@master

Patch Set: Call directly to specialized UTF-8 version. Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 part of dart.core;	5 part of dart.core;

6	6

7 /**	7 /**

8 * A parsed URI, such as a URL.	8 * A parsed URI, such as a URL.

9 *	9 *

10 * See also:	10 * See also:

(...skipping 1224 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1235 bool isFile = (scheme == "file");	1235 bool isFile = (scheme == "file");

1236 bool ensureLeadingSlash = isFile \|\| hasAuthority;	1236 bool ensureLeadingSlash = isFile \|\| hasAuthority;

1237 if (path == null && pathSegments == null) return isFile ? "/" : "";	1237 if (path == null && pathSegments == null) return isFile ? "/" : "";

1238 if (path != null && pathSegments != null) {	1238 if (path != null && pathSegments != null) {

1239 throw new ArgumentError('Both path and pathSegments specified');	1239 throw new ArgumentError('Both path and pathSegments specified');

1240 }	1240 }

1241 var result;	1241 var result;

1242 if (path != null) {	1242 if (path != null) {

1243 result = _normalize(path, start, end, _pathCharOrSlashTable);	1243 result = _normalize(path, start, end, _pathCharOrSlashTable);

1244 } else {	1244 } else {

1245 result = pathSegments.map((s) => _uriEncode(_pathCharTable, s)).join("/");	1245 result = pathSegments.map((s) =>

	1246 _uriEncodeUtf8(_pathCharTable, s, false)).join("/");

1246 }	1247 }

1247 if (result.isEmpty) {	1248 if (result.isEmpty) {

1248 if (isFile) return "/";	1249 if (isFile) return "/";

1249 } else if (ensureLeadingSlash && !result.startsWith('/')) {	1250 } else if (ensureLeadingSlash && !result.startsWith('/')) {

1250 result = "/" + result;	1251 result = "/" + result;

1251 }	1252 }

1252 result = _normalizePath(result, scheme, hasAuthority);	1253 result = _normalizePath(result, scheme, hasAuthority);

1253 return result;	1254 return result;

1254 }	1255 }

1255	1256

(...skipping 689 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1945 * string.	1946 * string.

1946 *	1947 *

1947 * For encoding the query part consider using	1948 * For encoding the query part consider using

1948 * [encodeQueryComponent].	1949 * [encodeQueryComponent].

1949 *	1950 *

1950 * To avoid the need for explicitly encoding use the [pathSegments]	1951 * To avoid the need for explicitly encoding use the [pathSegments]

1951 * and [queryParameters] optional named arguments when constructing	1952 * and [queryParameters] optional named arguments when constructing

1952 * a [Uri].	1953 * a [Uri].

1953 */	1954 */

1954 static String encodeComponent(String component) {	1955 static String encodeComponent(String component) {

1955 return _uriEncode(_unreserved2396Table, component);	1956 return _uriEncodeUtf8(_unreserved2396Table, component, false);

1956 }	1957 }

1957	1958

1958 /**	1959 /**

1959 * Encode the string [component] according to the HTML 4.01 rules	1960 * Encode the string [component] according to the HTML 4.01 rules

1960 * for encoding the posting of a HTML form as a query string	1961 * for encoding the posting of a HTML form as a query string

1961 * component.	1962 * component.

1962 *	1963 *

1963 * Encode the string [component] according to the HTML 4.01 rules	1964 * Encode the string [component] according to the HTML 4.01 rules

1964 * for encoding the posting of a HTML form as a query string	1965 * for encoding the posting of a HTML form as a query string

1965 * component.	1966 * component.

(...skipping 17 matching lines...) Expand all Loading...
1983 *	1984 *

1984 * To avoid the need for explicitly encoding the query use the	1985 * To avoid the need for explicitly encoding the query use the

1985 * [queryParameters] optional named arguments when constructing a	1986 * [queryParameters] optional named arguments when constructing a

1986 * [Uri].	1987 * [Uri].

1987 *	1988 *

1988 * See http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.2 for more	1989 * See http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.2 for more

1989 * details.	1990 * details.

1990 */	1991 */

1991 static String encodeQueryComponent(String component,	1992 static String encodeQueryComponent(String component,

1992 {Encoding encoding: UTF8}) {	1993 {Encoding encoding: UTF8}) {

	1994 const int spaceToPlus = true;
	sra1 2015/11/03 17:28:42 checked mode error checked mode error
1993 return _uriEncode(	1995 return _uriEncode(

1994 _unreservedTable, component, encoding: encoding, spaceToPlus: true);	1996 _unreservedTable, component, encoding, spaceToPlus);

1995 }	1997 }

1996	1998

1997 /**	1999 /**

1998 * Decodes the percent-encoding in [encodedComponent].	2000 * Decodes the percent-encoding in [encodedComponent].

1999 *	2001 *

2000 * Note that decoding a URI component might change its meaning as	2002 * Note that decoding a URI component might change its meaning as

2001 * some of the decoded characters could be characters with are	2003 * some of the decoded characters could be characters with are

2002 * delimiters for a given URI componene type. Always split a URI	2004 * delimiters for a given URI componene type. Always split a URI

2003 * component using the delimiters for the component before decoding	2005 * component using the delimiters for the component before decoding

2004 * the individual parts.	2006 * the individual parts.

(...skipping 23 matching lines...) Expand all Loading...
2028 /**	2030 /**

2029 * Encode the string [uri] using percent-encoding to make it	2031 * Encode the string [uri] using percent-encoding to make it

2030 * safe for literal use as a full URI.	2032 * safe for literal use as a full URI.

2031 *	2033 *

2032 * All characters except uppercase and lowercase letters, digits and	2034 * All characters except uppercase and lowercase letters, digits and

2033 * the characters `!#$&'()*+,-./:;=?@_~` are percent-encoded. This	2035 * the characters `!#$&'()*+,-./:;=?@_~` are percent-encoded. This

2034 * is the set of characters specified in in ECMA-262 version 5.1 for	2036 * is the set of characters specified in in ECMA-262 version 5.1 for

2035 * the encodeURI function .	2037 * the encodeURI function .

2036 */	2038 */

2037 static String encodeFull(String uri) {	2039 static String encodeFull(String uri) {

2038 return _uriEncode(_encodeFullTable, uri);	2040 return _uriEncodeUtf8(_encodeFullTable, uri, false);

2039 }	2041 }

2040	2042

2041 /**	2043 /**

2042 * Decodes the percent-encoding in [uri].	2044 * Decodes the percent-encoding in [uri].

2043 *	2045 *

2044 * Note that decoding a full URI might change its meaning as some of	2046 * Note that decoding a full URI might change its meaning as some of

2045 * the decoded characters could be reserved characters. In most	2047 * the decoded characters could be reserved characters. In most

2046 * cases an encoded URI should be parsed into components using	2048 * cases an encoded URI should be parsed into components using

2047 * [Uri.parse] before decoding the separate components.	2049 * [Uri.parse] before decoding the separate components.

2048 */	2050 */

(...skipping 185 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2234 static const int _DOT = 0x2E;	2236 static const int _DOT = 0x2E;

2235 static const int _SLASH = 0x2F;	2237 static const int _SLASH = 0x2F;

2236 static const int _ZERO = 0x30;	2238 static const int _ZERO = 0x30;

2237 static const int _NINE = 0x39;	2239 static const int _NINE = 0x39;

2238 static const int _COLON = 0x3A;	2240 static const int _COLON = 0x3A;

2239 static const int _LESS = 0x3C;	2241 static const int _LESS = 0x3C;

2240 static const int _GREATER = 0x3E;	2242 static const int _GREATER = 0x3E;

2241 static const int _QUESTION = 0x3F;	2243 static const int _QUESTION = 0x3F;

2242 static const int _AT_SIGN = 0x40;	2244 static const int _AT_SIGN = 0x40;

2243 static const int _UPPER_CASE_A = 0x41;	2245 static const int _UPPER_CASE_A = 0x41;

	2246 static const int _UPPER_CASE_E = 0x45;

2244 static const int _UPPER_CASE_F = 0x46;	2247 static const int _UPPER_CASE_F = 0x46;

2245 static const int _UPPER_CASE_Z = 0x5A;	2248 static const int _UPPER_CASE_Z = 0x5A;

2246 static const int _LEFT_BRACKET = 0x5B;	2249 static const int _LEFT_BRACKET = 0x5B;

2247 static const int _BACKSLASH = 0x5C;	2250 static const int _BACKSLASH = 0x5C;

2248 static const int _RIGHT_BRACKET = 0x5D;	2251 static const int _RIGHT_BRACKET = 0x5D;

2249 static const int _LOWER_CASE_A = 0x61;	2252 static const int _LOWER_CASE_A = 0x61;

2250 static const int _LOWER_CASE_F = 0x66;	2253 static const int _LOWER_CASE_F = 0x66;

2251 static const int _LOWER_CASE_Z = 0x7A;	2254 static const int _LOWER_CASE_Z = 0x7A;

2252 static const int _BAR = 0x7C;	2255 static const int _BAR = 0x7C;

2253	2256

	2257 static const String _hexDigits = "0123456789ABCDEF";

	2258

2254 /**	2259 /**

2255 * This is the internal implementation of JavaScript's encodeURI function.	2260 * This is the internal implementation of JavaScript's encodeURI function.

2256 * It encodes all characters in the string [text] except for those	2261 * It encodes all characters in the string [text] except for those

2257 * that appear in [canonicalTable], and returns the escaped string.	2262 * that appear in [canonicalTable], and returns the escaped string.

2258 */	2263 */

2259 static String _uriEncode(List<int> canonicalTable,	2264 static String _uriEncode(List<int> canonicalTable,

2260 String text,	2265 String text,

2261 {Encoding encoding: UTF8,	2266 Encoding encoding,

2262 bool spaceToPlus: false}) {	2267 bool spaceToPlus) {

2263 byteToHex(byte, buffer) {	2268 // Use a specialized encoder for known Unicode-compatible encodings.

2264 const String hex = '0123456789ABCDEF';	2269 // This avoids encoding the string first and then working on the bytes,

2265 buffer.writeCharCode(hex.codeUnitAt(byte >> 4));	2270 // and instead works directly on the code units of the string.

2266 buffer.writeCharCode(hex.codeUnitAt(byte & 0x0f));	2271 if (identical(encoding, UTF8)) {

	2272 return _uriEncodeUtf8(canonicalTable, text, spaceToPlus);

	2273 }

	2274 if (identical(encoding, LATIN1)) {

	2275 return _uriEncodeSubset(canonicalTable, text, 255, spaceToPlus);

	2276 }

	2277 if (identical(encoding, ASCII)) {

	2278 return _uriEncodeSubset(canonicalTable, text, 127, spaceToPlus);

2267 }	2279 }

2268	2280

2269 // Encode the string into bytes then generate an ASCII only string	2281 // Encode the string into bytes then generate an ASCII only string

2270 // by percent encoding selected bytes.	2282 // by percent encoding selected bytes.

	2283 var bytes = encoding.encode(text);

	2284 int i = 0;

	2285 noChange: {

	2286 while (i < bytes.length) {

	2287 int byte = bytes[i];

	2288 if (byte < 128 &&

	2289 ((canonicalTable[byte >> 4] & (1 << (byte & 0x0f))) != 0)) {

	2290 break noChange;

	2291 }

	2292 i++;

	2293 }

	2294 return text;

	2295 }

2271 StringBuffer result = new StringBuffer();	2296 StringBuffer result = new StringBuffer();

2272 var bytes = encoding.encode(text);	2297 for (int j = 0; j < i; j++) {

2273 for (int i = 0; i < bytes.length; i++) {	2298 result[j] = text.codeUnitAt(j);
	sra1 2015/11/03 17:28:42 StringBuffer does not have []=. StringBuffer does not have []=. Lasse Reichstein Nielsen 2015/11/03 18:04:08 Duh, should be writeCharCode. Obviously needs mor Duh, should be writeCharCode. Obviously needs more tests.
2274 int byte = bytes[i];	2299 }

2275 if (byte < 128 &&	2300 while (true) {

2276 ((canonicalTable[byte >> 4] & (1 << (byte & 0x0f))) != 0)) {	2301 if (spaceToPlus && char == _SPACE) {

2277 result.writeCharCode(byte);

2278 } else if (spaceToPlus && byte == _SPACE) {

2279 result.writeCharCode(_PLUS);	2302 result.writeCharCode(_PLUS);

2280 } else {	2303 } else {

2281 result.writeCharCode(_PERCENT);	2304 result..writeCharCode(_PERCENT)

2282 byteToHex(byte, result);	2305 ..writeCharCode(_hexDigits.codeUnitAt(char >> 4))

	2306 ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));

	2307 }

	2308 noChange: { // See dartbug.com/21481

	2309 while (++i < text.length) {

	2310 char = text.codeUnitAt(i);

	2311 if (char < 128 &&

	2312 (canonicalTable[char >> 4] & (1 << (char & 0xf)) != 0)) {

	2313 result.writeCharCode(char);

	2314 } else {

	2315 break noChange;

	2316 }

	2317 }

	2318 break;

2283 }	2319 }

2284 }	2320 }

2285 return result.toString();	2321 return result.toString();

	2322 }

	2323

	2324 /**

	2325 * Encodes a text where the encoding is a subset of Unicode.

	2326 *

	2327 * The subsets are either Latin-1 or US-ASCII, and they are distinguished

	2328 * by the [limit] parameter which is the maximal code point allowed

	2329 * by the encoding.

	2330 */

	2331 static String _uriEncodeSubset(List<int> canonicalTable, String text,

	2332 int limit, bool spaceToPlus) {

	2333 assert(limit == 127 \|\| limit == 255);

	2334 int i = 0;

	2335 int char;

	2336 noChange: { // See dartbug.com/21481

	2337 while (i < text.length) {

	2338 char = text.codeUnitAt(i);

	2339 if (char >= 128 \|\|

	2340 (canonicalTable[char >> 4] & (1 << (char & 0xf)) == 0)) {

	2341 break noChange;

	2342 }

	2343 i++;

	2344 }

	2345 return text;

	2346 }

	2347 StringBuffer result = new StringBuffer();

	2348 for (int j = 0; j < i; j++) {

	2349 result.writeCharCode(text.codeUnitAt(j));

	2350 }

	2351 while (true) {

	2352 if (char <= limit) {

	2353 if (spaceToPlus && char == _SPACE) {

	2354 result.writeCharCode(_PLUS);

	2355 } else {

	2356 result..writeCharCode(_PERCENT)

	2357 ..writeCharCode(_hexDigits.codeUnitAt(char >> 4))

	2358 ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));

	2359 }

	2360 } else {

	2361 if (limit == 255) {

	2362 throw new ArgumentError.value(

	2363 text, "Source contains non-Latin-1 characters.");

	2364 }

	2365 throw new ArgumentError.value(

	2366 text, "Source contains non-ASCII bytes.");

	2367 }

	2368 noChange: { // See dartbug.com/21481

	2369 while (++i < text.length) {

	2370 char = text.codeUnitAt(i);

	2371 if (char < 128 &&

	2372 (canonicalTable[char >> 4] & (1 << (char & 0xf)) != 0)) {

	2373 result.writeCharCode(char);

	2374 } else {

	2375 break noChange;

	2376 }

	2377 }

	2378 break;

	2379 }

	2380 }

	2381 return result.toString();

	2382 }

	2383

	2384 static String _uriEncodeUtf8(List<int> canonicalTable, String text,

	2385 bool spaceToPlus) {

	2386 int i = 0;

	2387 int char;

	2388 noChange: { // See dartbug.com/21481

	2389 while (i < text.length) {

	2390 char = text.codeUnitAt(i);

	2391 if (char >= 128 \|\|

	2392 (canonicalTable[char >> 4] & (1 << (char & 0xf)) == 0)) {

	2393 break noChange;

	2394 }

	2395 i++;

	2396 }

	2397 return text;

	2398 }

	2399 StringBuffer result = new StringBuffer();

	2400 for (int j = 0; j < i; j++) {

	2401 result.writeCharCode(text.codeUnitAt(j));

	2402 }

	2403 while (true) {

	2404 if (char < 128) {

	2405 if (spaceToPlus && char == _SPACE) {

	2406 result.writeCharCode(_PLUS);

	2407 } else {

	2408 result..writeCharCode(_PERCENT)

	2409 ..writeCharCode(_ZERO + (char >> 4)) // Range 0-7.

	2410 ..writeCharCode(_hexDigits.codeUnitAt(char & 0x0F));

	2411 }

	2412 } else if (char < 0x800) {

	2413 result..writeCharCode(_PERCENT)

	2414 ..writeCharCode(_hexDigits.codeUnitAt(0xC + (char >> 10)))

	2415 ..writeCharCode(_hexDigits.codeUnitAt((char >> 6) & 0xF))

	2416 ..writeCharCode(_PERCENT)

	2417 ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 4) & 3)))

	2418 ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));

	2419 } else {

	2420 assert(char < 0x10000); // UTF-16 code unit.

	2421 int next;

	2422 if (char & 0xFC00 != 0xD800 \|\|

	2423 i + 1 == text.length \|\|

	2424 (next = text.codeUnitAt(i + 1)) & 0xFC00 != 0xDC00) {

	2425 result..writeCharCode(_PERCENT)

	2426 ..writeCharCode(_UPPER_CASE_E)

	2427 ..writeCharCode(_hexDigits.codeUnitAt((char >> 12) & 0xF))

	2428 ..writeCharCode(_PERCENT)

	2429 ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 10) & 3)))

	2430 ..writeCharCode(_hexDigits.codeUnitAt((char >> 6) & 0xF))

	2431 ..writeCharCode(_PERCENT)

	2432 ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 4) & 3)))

	2433 ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));

	2434 } else {

	2435 // Lead surrogate followed by tail surrogate.

	2436 char = 0x10000 + (((char & 0x3FF) << 10) \| (next & 0x3FF));

	2437 i++;

	2438 result..writeCharCode(_PERCENT)

	2439 ..writeCharCode(_UPPER_CASE_F)

	2440 ..writeCharCode(_ZERO + (char >> 18)) // Range 0..7

	2441 ..writeCharCode(_PERCENT)

	2442 ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 16) & 3)))

	2443 ..writeCharCode(_hexDigits.codeUnitAt((char >> 12) & 0xF))

	2444 ..writeCharCode(_PERCENT)

	2445 ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 10) & 3)))

	2446 ..writeCharCode(_hexDigits.codeUnitAt((char >> 6) & 0xF))

	2447 ..writeCharCode(_PERCENT)

	2448 ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 4) & 3)))

	2449 ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));

	2450 }

	2451 }

	2452 noChange: { // See dartbug.com/21481

	2453 while (++i < text.length) {

	2454 char = text.codeUnitAt(i);

	2455 if (char < 128 &&

	2456 (canonicalTable[char >> 4] & (1 << (char & 0xf)) != 0)) {

	2457 result.writeCharCode(char);

	2458 } else {

	2459 break noChange;

	2460 }

	2461 }

	2462 // Return result.toString(), but move the return to the end of the

	2463 // function to appease analysis.

	2464 break;

	2465 }

	2466 }

	2467 return result.toString();

2286 }	2468 }

2287	2469

2288 /**	2470 /**

2289 * Convert a byte (2 character hex sequence) in string [s] starting	2471 * Convert a byte (2 character hex sequence) in string [s] starting

2290 * at position [pos] to its ordinal value	2472 * at position [pos] to its ordinal value

2291 */	2473 */

2292 static int _hexCharPairToByte(String s, int pos) {	2474 static int _hexCharPairToByte(String s, int pos) {

2293 int byte = 0;	2475 int byte = 0;

2294 for (int i = 0; i < 2; i++) {	2476 for (int i = 0; i < 2; i++) {

2295 var charCode = s.codeUnitAt(pos + i);	2477 var charCode = s.codeUnitAt(pos + i);

(...skipping 308 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2604 0xafff, // 0x30 - 0x3f 1111111111110101	2786 0xafff, // 0x30 - 0x3f 1111111111110101

2605 // @ABCDEFGHIJKLMNO	2787 // @ABCDEFGHIJKLMNO

2606 0xffff, // 0x40 - 0x4f 1111111111111111	2788 0xffff, // 0x40 - 0x4f 1111111111111111

2607 // PQRSTUVWXYZ _	2789 // PQRSTUVWXYZ _

2608 0x87ff, // 0x50 - 0x5f 1111111111100001	2790 0x87ff, // 0x50 - 0x5f 1111111111100001

2609 // abcdefghijklmno	2791 // abcdefghijklmno

2610 0xfffe, // 0x60 - 0x6f 0111111111111111	2792 0xfffe, // 0x60 - 0x6f 0111111111111111

2611 // pqrstuvwxyz ~	2793 // pqrstuvwxyz ~

2612 0x47ff]; // 0x70 - 0x7f 1111111111100010	2794 0x47ff]; // 0x70 - 0x7f 1111111111100010

2613 }	2795 }

OLD	NEW

« no previous file with comments | « sdk/lib/core/core.dart ('k') | no next file » | no next file with comments »