Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(154)

Side by Side Diff: sdk/lib/core/uri.dart

Issue 1409053007: More optimization of _uriEncode. (Closed) Base URL: https://github.com/dart-lang/sdk.git@master
Patch Set: Call directly to specialized UTF-8 version. Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « sdk/lib/core/core.dart ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 part of dart.core; 5 part of dart.core;
6 6
7 /** 7 /**
8 * A parsed URI, such as a URL. 8 * A parsed URI, such as a URL.
9 * 9 *
10 * **See also:** 10 * **See also:**
(...skipping 1224 matching lines...) Expand 10 before | Expand all | Expand 10 after
1235 bool isFile = (scheme == "file"); 1235 bool isFile = (scheme == "file");
1236 bool ensureLeadingSlash = isFile || hasAuthority; 1236 bool ensureLeadingSlash = isFile || hasAuthority;
1237 if (path == null && pathSegments == null) return isFile ? "/" : ""; 1237 if (path == null && pathSegments == null) return isFile ? "/" : "";
1238 if (path != null && pathSegments != null) { 1238 if (path != null && pathSegments != null) {
1239 throw new ArgumentError('Both path and pathSegments specified'); 1239 throw new ArgumentError('Both path and pathSegments specified');
1240 } 1240 }
1241 var result; 1241 var result;
1242 if (path != null) { 1242 if (path != null) {
1243 result = _normalize(path, start, end, _pathCharOrSlashTable); 1243 result = _normalize(path, start, end, _pathCharOrSlashTable);
1244 } else { 1244 } else {
1245 result = pathSegments.map((s) => _uriEncode(_pathCharTable, s)).join("/"); 1245 result = pathSegments.map((s) =>
1246 _uriEncodeUtf8(_pathCharTable, s, false)).join("/");
1246 } 1247 }
1247 if (result.isEmpty) { 1248 if (result.isEmpty) {
1248 if (isFile) return "/"; 1249 if (isFile) return "/";
1249 } else if (ensureLeadingSlash && !result.startsWith('/')) { 1250 } else if (ensureLeadingSlash && !result.startsWith('/')) {
1250 result = "/" + result; 1251 result = "/" + result;
1251 } 1252 }
1252 result = _normalizePath(result, scheme, hasAuthority); 1253 result = _normalizePath(result, scheme, hasAuthority);
1253 return result; 1254 return result;
1254 } 1255 }
1255 1256
(...skipping 689 matching lines...) Expand 10 before | Expand all | Expand 10 after
1945 * string. 1946 * string.
1946 * 1947 *
1947 * For encoding the query part consider using 1948 * For encoding the query part consider using
1948 * [encodeQueryComponent]. 1949 * [encodeQueryComponent].
1949 * 1950 *
1950 * To avoid the need for explicitly encoding use the [pathSegments] 1951 * To avoid the need for explicitly encoding use the [pathSegments]
1951 * and [queryParameters] optional named arguments when constructing 1952 * and [queryParameters] optional named arguments when constructing
1952 * a [Uri]. 1953 * a [Uri].
1953 */ 1954 */
1954 static String encodeComponent(String component) { 1955 static String encodeComponent(String component) {
1955 return _uriEncode(_unreserved2396Table, component); 1956 return _uriEncodeUtf8(_unreserved2396Table, component, false);
1956 } 1957 }
1957 1958
1958 /** 1959 /**
1959 * Encode the string [component] according to the HTML 4.01 rules 1960 * Encode the string [component] according to the HTML 4.01 rules
1960 * for encoding the posting of a HTML form as a query string 1961 * for encoding the posting of a HTML form as a query string
1961 * component. 1962 * component.
1962 * 1963 *
1963 * Encode the string [component] according to the HTML 4.01 rules 1964 * Encode the string [component] according to the HTML 4.01 rules
1964 * for encoding the posting of a HTML form as a query string 1965 * for encoding the posting of a HTML form as a query string
1965 * component. 1966 * component.
(...skipping 17 matching lines...) Expand all
1983 * 1984 *
1984 * To avoid the need for explicitly encoding the query use the 1985 * To avoid the need for explicitly encoding the query use the
1985 * [queryParameters] optional named arguments when constructing a 1986 * [queryParameters] optional named arguments when constructing a
1986 * [Uri]. 1987 * [Uri].
1987 * 1988 *
1988 * See http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.2 for more 1989 * See http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.2 for more
1989 * details. 1990 * details.
1990 */ 1991 */
1991 static String encodeQueryComponent(String component, 1992 static String encodeQueryComponent(String component,
1992 {Encoding encoding: UTF8}) { 1993 {Encoding encoding: UTF8}) {
1994 const int spaceToPlus = true;
sra1 2015/11/03 17:28:42 checked mode error
1993 return _uriEncode( 1995 return _uriEncode(
1994 _unreservedTable, component, encoding: encoding, spaceToPlus: true); 1996 _unreservedTable, component, encoding, spaceToPlus);
1995 } 1997 }
1996 1998
1997 /** 1999 /**
1998 * Decodes the percent-encoding in [encodedComponent]. 2000 * Decodes the percent-encoding in [encodedComponent].
1999 * 2001 *
2000 * Note that decoding a URI component might change its meaning as 2002 * Note that decoding a URI component might change its meaning as
2001 * some of the decoded characters could be characters with are 2003 * some of the decoded characters could be characters with are
2002 * delimiters for a given URI componene type. Always split a URI 2004 * delimiters for a given URI componene type. Always split a URI
2003 * component using the delimiters for the component before decoding 2005 * component using the delimiters for the component before decoding
2004 * the individual parts. 2006 * the individual parts.
(...skipping 23 matching lines...) Expand all
2028 /** 2030 /**
2029 * Encode the string [uri] using percent-encoding to make it 2031 * Encode the string [uri] using percent-encoding to make it
2030 * safe for literal use as a full URI. 2032 * safe for literal use as a full URI.
2031 * 2033 *
2032 * All characters except uppercase and lowercase letters, digits and 2034 * All characters except uppercase and lowercase letters, digits and
2033 * the characters `!#$&'()*+,-./:;=?@_~` are percent-encoded. This 2035 * the characters `!#$&'()*+,-./:;=?@_~` are percent-encoded. This
2034 * is the set of characters specified in in ECMA-262 version 5.1 for 2036 * is the set of characters specified in in ECMA-262 version 5.1 for
2035 * the encodeURI function . 2037 * the encodeURI function .
2036 */ 2038 */
2037 static String encodeFull(String uri) { 2039 static String encodeFull(String uri) {
2038 return _uriEncode(_encodeFullTable, uri); 2040 return _uriEncodeUtf8(_encodeFullTable, uri, false);
2039 } 2041 }
2040 2042
2041 /** 2043 /**
2042 * Decodes the percent-encoding in [uri]. 2044 * Decodes the percent-encoding in [uri].
2043 * 2045 *
2044 * Note that decoding a full URI might change its meaning as some of 2046 * Note that decoding a full URI might change its meaning as some of
2045 * the decoded characters could be reserved characters. In most 2047 * the decoded characters could be reserved characters. In most
2046 * cases an encoded URI should be parsed into components using 2048 * cases an encoded URI should be parsed into components using
2047 * [Uri.parse] before decoding the separate components. 2049 * [Uri.parse] before decoding the separate components.
2048 */ 2050 */
(...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after
2234 static const int _DOT = 0x2E; 2236 static const int _DOT = 0x2E;
2235 static const int _SLASH = 0x2F; 2237 static const int _SLASH = 0x2F;
2236 static const int _ZERO = 0x30; 2238 static const int _ZERO = 0x30;
2237 static const int _NINE = 0x39; 2239 static const int _NINE = 0x39;
2238 static const int _COLON = 0x3A; 2240 static const int _COLON = 0x3A;
2239 static const int _LESS = 0x3C; 2241 static const int _LESS = 0x3C;
2240 static const int _GREATER = 0x3E; 2242 static const int _GREATER = 0x3E;
2241 static const int _QUESTION = 0x3F; 2243 static const int _QUESTION = 0x3F;
2242 static const int _AT_SIGN = 0x40; 2244 static const int _AT_SIGN = 0x40;
2243 static const int _UPPER_CASE_A = 0x41; 2245 static const int _UPPER_CASE_A = 0x41;
2246 static const int _UPPER_CASE_E = 0x45;
2244 static const int _UPPER_CASE_F = 0x46; 2247 static const int _UPPER_CASE_F = 0x46;
2245 static const int _UPPER_CASE_Z = 0x5A; 2248 static const int _UPPER_CASE_Z = 0x5A;
2246 static const int _LEFT_BRACKET = 0x5B; 2249 static const int _LEFT_BRACKET = 0x5B;
2247 static const int _BACKSLASH = 0x5C; 2250 static const int _BACKSLASH = 0x5C;
2248 static const int _RIGHT_BRACKET = 0x5D; 2251 static const int _RIGHT_BRACKET = 0x5D;
2249 static const int _LOWER_CASE_A = 0x61; 2252 static const int _LOWER_CASE_A = 0x61;
2250 static const int _LOWER_CASE_F = 0x66; 2253 static const int _LOWER_CASE_F = 0x66;
2251 static const int _LOWER_CASE_Z = 0x7A; 2254 static const int _LOWER_CASE_Z = 0x7A;
2252 static const int _BAR = 0x7C; 2255 static const int _BAR = 0x7C;
2253 2256
2257 static const String _hexDigits = "0123456789ABCDEF";
2258
2254 /** 2259 /**
2255 * This is the internal implementation of JavaScript's encodeURI function. 2260 * This is the internal implementation of JavaScript's encodeURI function.
2256 * It encodes all characters in the string [text] except for those 2261 * It encodes all characters in the string [text] except for those
2257 * that appear in [canonicalTable], and returns the escaped string. 2262 * that appear in [canonicalTable], and returns the escaped string.
2258 */ 2263 */
2259 static String _uriEncode(List<int> canonicalTable, 2264 static String _uriEncode(List<int> canonicalTable,
2260 String text, 2265 String text,
2261 {Encoding encoding: UTF8, 2266 Encoding encoding,
2262 bool spaceToPlus: false}) { 2267 bool spaceToPlus) {
2263 byteToHex(byte, buffer) { 2268 // Use a specialized encoder for known Unicode-compatible encodings.
2264 const String hex = '0123456789ABCDEF'; 2269 // This avoids encoding the string first and then working on the bytes,
2265 buffer.writeCharCode(hex.codeUnitAt(byte >> 4)); 2270 // and instead works directly on the code units of the string.
2266 buffer.writeCharCode(hex.codeUnitAt(byte & 0x0f)); 2271 if (identical(encoding, UTF8)) {
2272 return _uriEncodeUtf8(canonicalTable, text, spaceToPlus);
2273 }
2274 if (identical(encoding, LATIN1)) {
2275 return _uriEncodeSubset(canonicalTable, text, 255, spaceToPlus);
2276 }
2277 if (identical(encoding, ASCII)) {
2278 return _uriEncodeSubset(canonicalTable, text, 127, spaceToPlus);
2267 } 2279 }
2268 2280
2269 // Encode the string into bytes then generate an ASCII only string 2281 // Encode the string into bytes then generate an ASCII only string
2270 // by percent encoding selected bytes. 2282 // by percent encoding selected bytes.
2283 var bytes = encoding.encode(text);
2284 int i = 0;
2285 noChange: {
2286 while (i < bytes.length) {
2287 int byte = bytes[i];
2288 if (byte < 128 &&
2289 ((canonicalTable[byte >> 4] & (1 << (byte & 0x0f))) != 0)) {
2290 break noChange;
2291 }
2292 i++;
2293 }
2294 return text;
2295 }
2271 StringBuffer result = new StringBuffer(); 2296 StringBuffer result = new StringBuffer();
2272 var bytes = encoding.encode(text); 2297 for (int j = 0; j < i; j++) {
2273 for (int i = 0; i < bytes.length; i++) { 2298 result[j] = text.codeUnitAt(j);
sra1 2015/11/03 17:28:42 StringBuffer does not have []=.
Lasse Reichstein Nielsen 2015/11/03 18:04:08 Duh, should be writeCharCode. Obviously needs mor
2274 int byte = bytes[i]; 2299 }
2275 if (byte < 128 && 2300 while (true) {
2276 ((canonicalTable[byte >> 4] & (1 << (byte & 0x0f))) != 0)) { 2301 if (spaceToPlus && char == _SPACE) {
2277 result.writeCharCode(byte);
2278 } else if (spaceToPlus && byte == _SPACE) {
2279 result.writeCharCode(_PLUS); 2302 result.writeCharCode(_PLUS);
2280 } else { 2303 } else {
2281 result.writeCharCode(_PERCENT); 2304 result..writeCharCode(_PERCENT)
2282 byteToHex(byte, result); 2305 ..writeCharCode(_hexDigits.codeUnitAt(char >> 4))
2306 ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));
2307 }
2308 noChange: { // See dartbug.com/21481
2309 while (++i < text.length) {
2310 char = text.codeUnitAt(i);
2311 if (char < 128 &&
2312 (canonicalTable[char >> 4] & (1 << (char & 0xf)) != 0)) {
2313 result.writeCharCode(char);
2314 } else {
2315 break noChange;
2316 }
2317 }
2318 break;
2283 } 2319 }
2284 } 2320 }
2285 return result.toString(); 2321 return result.toString();
2322 }
2323
2324 /**
2325 * Encodes a text where the encoding is a subset of Unicode.
2326 *
2327 * The subsets are either Latin-1 or US-ASCII, and they are distinguished
2328 * by the [limit] parameter which is the maximal code point allowed
2329 * by the encoding.
2330 */
2331 static String _uriEncodeSubset(List<int> canonicalTable, String text,
2332 int limit, bool spaceToPlus) {
2333 assert(limit == 127 || limit == 255);
2334 int i = 0;
2335 int char;
2336 noChange: { // See dartbug.com/21481
2337 while (i < text.length) {
2338 char = text.codeUnitAt(i);
2339 if (char >= 128 ||
2340 (canonicalTable[char >> 4] & (1 << (char & 0xf)) == 0)) {
2341 break noChange;
2342 }
2343 i++;
2344 }
2345 return text;
2346 }
2347 StringBuffer result = new StringBuffer();
2348 for (int j = 0; j < i; j++) {
2349 result.writeCharCode(text.codeUnitAt(j));
2350 }
2351 while (true) {
2352 if (char <= limit) {
2353 if (spaceToPlus && char == _SPACE) {
2354 result.writeCharCode(_PLUS);
2355 } else {
2356 result..writeCharCode(_PERCENT)
2357 ..writeCharCode(_hexDigits.codeUnitAt(char >> 4))
2358 ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));
2359 }
2360 } else {
2361 if (limit == 255) {
2362 throw new ArgumentError.value(
2363 text, "Source contains non-Latin-1 characters.");
2364 }
2365 throw new ArgumentError.value(
2366 text, "Source contains non-ASCII bytes.");
2367 }
2368 noChange: { // See dartbug.com/21481
2369 while (++i < text.length) {
2370 char = text.codeUnitAt(i);
2371 if (char < 128 &&
2372 (canonicalTable[char >> 4] & (1 << (char & 0xf)) != 0)) {
2373 result.writeCharCode(char);
2374 } else {
2375 break noChange;
2376 }
2377 }
2378 break;
2379 }
2380 }
2381 return result.toString();
2382 }
2383
2384 static String _uriEncodeUtf8(List<int> canonicalTable, String text,
2385 bool spaceToPlus) {
2386 int i = 0;
2387 int char;
2388 noChange: { // See dartbug.com/21481
2389 while (i < text.length) {
2390 char = text.codeUnitAt(i);
2391 if (char >= 128 ||
2392 (canonicalTable[char >> 4] & (1 << (char & 0xf)) == 0)) {
2393 break noChange;
2394 }
2395 i++;
2396 }
2397 return text;
2398 }
2399 StringBuffer result = new StringBuffer();
2400 for (int j = 0; j < i; j++) {
2401 result.writeCharCode(text.codeUnitAt(j));
2402 }
2403 while (true) {
2404 if (char < 128) {
2405 if (spaceToPlus && char == _SPACE) {
2406 result.writeCharCode(_PLUS);
2407 } else {
2408 result..writeCharCode(_PERCENT)
2409 ..writeCharCode(_ZERO + (char >> 4)) // Range 0-7.
2410 ..writeCharCode(_hexDigits.codeUnitAt(char & 0x0F));
2411 }
2412 } else if (char < 0x800) {
2413 result..writeCharCode(_PERCENT)
2414 ..writeCharCode(_hexDigits.codeUnitAt(0xC + (char >> 10)))
2415 ..writeCharCode(_hexDigits.codeUnitAt((char >> 6) & 0xF))
2416 ..writeCharCode(_PERCENT)
2417 ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 4) & 3)))
2418 ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));
2419 } else {
2420 assert(char < 0x10000); // UTF-16 code unit.
2421 int next;
2422 if (char & 0xFC00 != 0xD800 ||
2423 i + 1 == text.length ||
2424 (next = text.codeUnitAt(i + 1)) & 0xFC00 != 0xDC00) {
2425 result..writeCharCode(_PERCENT)
2426 ..writeCharCode(_UPPER_CASE_E)
2427 ..writeCharCode(_hexDigits.codeUnitAt((char >> 12) & 0xF))
2428 ..writeCharCode(_PERCENT)
2429 ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 10) & 3)))
2430 ..writeCharCode(_hexDigits.codeUnitAt((char >> 6) & 0xF))
2431 ..writeCharCode(_PERCENT)
2432 ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 4) & 3)))
2433 ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));
2434 } else {
2435 // Lead surrogate followed by tail surrogate.
2436 char = 0x10000 + (((char & 0x3FF) << 10) | (next & 0x3FF));
2437 i++;
2438 result..writeCharCode(_PERCENT)
2439 ..writeCharCode(_UPPER_CASE_F)
2440 ..writeCharCode(_ZERO + (char >> 18)) // Range 0..7
2441 ..writeCharCode(_PERCENT)
2442 ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 16) & 3)))
2443 ..writeCharCode(_hexDigits.codeUnitAt((char >> 12) & 0xF))
2444 ..writeCharCode(_PERCENT)
2445 ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 10) & 3)))
2446 ..writeCharCode(_hexDigits.codeUnitAt((char >> 6) & 0xF))
2447 ..writeCharCode(_PERCENT)
2448 ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 4) & 3)))
2449 ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));
2450 }
2451 }
2452 noChange: { // See dartbug.com/21481
2453 while (++i < text.length) {
2454 char = text.codeUnitAt(i);
2455 if (char < 128 &&
2456 (canonicalTable[char >> 4] & (1 << (char & 0xf)) != 0)) {
2457 result.writeCharCode(char);
2458 } else {
2459 break noChange;
2460 }
2461 }
2462 // Return result.toString(), but move the return to the end of the
2463 // function to appease analysis.
2464 break;
2465 }
2466 }
2467 return result.toString();
2286 } 2468 }
2287 2469
2288 /** 2470 /**
2289 * Convert a byte (2 character hex sequence) in string [s] starting 2471 * Convert a byte (2 character hex sequence) in string [s] starting
2290 * at position [pos] to its ordinal value 2472 * at position [pos] to its ordinal value
2291 */ 2473 */
2292 static int _hexCharPairToByte(String s, int pos) { 2474 static int _hexCharPairToByte(String s, int pos) {
2293 int byte = 0; 2475 int byte = 0;
2294 for (int i = 0; i < 2; i++) { 2476 for (int i = 0; i < 2; i++) {
2295 var charCode = s.codeUnitAt(pos + i); 2477 var charCode = s.codeUnitAt(pos + i);
(...skipping 308 matching lines...) Expand 10 before | Expand all | Expand 10 after
2604 0xafff, // 0x30 - 0x3f 1111111111110101 2786 0xafff, // 0x30 - 0x3f 1111111111110101
2605 // @ABCDEFGHIJKLMNO 2787 // @ABCDEFGHIJKLMNO
2606 0xffff, // 0x40 - 0x4f 1111111111111111 2788 0xffff, // 0x40 - 0x4f 1111111111111111
2607 // PQRSTUVWXYZ _ 2789 // PQRSTUVWXYZ _
2608 0x87ff, // 0x50 - 0x5f 1111111111100001 2790 0x87ff, // 0x50 - 0x5f 1111111111100001
2609 // abcdefghijklmno 2791 // abcdefghijklmno
2610 0xfffe, // 0x60 - 0x6f 0111111111111111 2792 0xfffe, // 0x60 - 0x6f 0111111111111111
2611 // pqrstuvwxyz ~ 2793 // pqrstuvwxyz ~
2612 0x47ff]; // 0x70 - 0x7f 1111111111100010 2794 0x47ff]; // 0x70 - 0x7f 1111111111100010
2613 } 2795 }
OLDNEW
« no previous file with comments | « sdk/lib/core/core.dart ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698