 Chromium Code Reviews
 Chromium Code Reviews Issue 1409053007:
  More optimization of _uriEncode.  (Closed) 
  Base URL: https://github.com/dart-lang/sdk.git@master
    
  
    Issue 1409053007:
  More optimization of _uriEncode.  (Closed) 
  Base URL: https://github.com/dart-lang/sdk.git@master| Index: sdk/lib/core/uri.dart | 
| diff --git a/sdk/lib/core/uri.dart b/sdk/lib/core/uri.dart | 
| index 10aa827cb190c6e93a8889cad67312629145d5f5..bd4d1a1e9e3e4dd81168bec83eb937966a41e110 100644 | 
| --- a/sdk/lib/core/uri.dart | 
| +++ b/sdk/lib/core/uri.dart | 
| @@ -1242,7 +1242,8 @@ class Uri { | 
| if (path != null) { | 
| result = _normalize(path, start, end, _pathCharOrSlashTable); | 
| } else { | 
| - result = pathSegments.map((s) => _uriEncode(_pathCharTable, s)).join("/"); | 
| + result = pathSegments.map((s) => | 
| + _uriEncodeUtf8(_pathCharTable, s, false)).join("/"); | 
| } | 
| if (result.isEmpty) { | 
| if (isFile) return "/"; | 
| @@ -1952,7 +1953,7 @@ class Uri { | 
| * a [Uri]. | 
| */ | 
| static String encodeComponent(String component) { | 
| - return _uriEncode(_unreserved2396Table, component); | 
| + return _uriEncodeUtf8(_unreserved2396Table, component, false); | 
| } | 
| /** | 
| @@ -1990,8 +1991,9 @@ class Uri { | 
| */ | 
| static String encodeQueryComponent(String component, | 
| {Encoding encoding: UTF8}) { | 
| + const int spaceToPlus = true; | 
| 
sra1
2015/11/03 17:28:42
checked mode error
 | 
| return _uriEncode( | 
| - _unreservedTable, component, encoding: encoding, spaceToPlus: true); | 
| + _unreservedTable, component, encoding, spaceToPlus); | 
| } | 
| /** | 
| @@ -2035,7 +2037,7 @@ class Uri { | 
| * the encodeURI function . | 
| */ | 
| static String encodeFull(String uri) { | 
| - return _uriEncode(_encodeFullTable, uri); | 
| + return _uriEncodeUtf8(_encodeFullTable, uri, false); | 
| } | 
| /** | 
| @@ -2241,6 +2243,7 @@ class Uri { | 
| static const int _QUESTION = 0x3F; | 
| static const int _AT_SIGN = 0x40; | 
| static const int _UPPER_CASE_A = 0x41; | 
| + static const int _UPPER_CASE_E = 0x45; | 
| static const int _UPPER_CASE_F = 0x46; | 
| static const int _UPPER_CASE_Z = 0x5A; | 
| static const int _LEFT_BRACKET = 0x5B; | 
| @@ -2251,6 +2254,8 @@ class Uri { | 
| static const int _LOWER_CASE_Z = 0x7A; | 
| static const int _BAR = 0x7C; | 
| + static const String _hexDigits = "0123456789ABCDEF"; | 
| + | 
| /** | 
| * This is the internal implementation of JavaScript's encodeURI function. | 
| * It encodes all characters in the string [text] except for those | 
| @@ -2258,28 +2263,205 @@ class Uri { | 
| */ | 
| static String _uriEncode(List<int> canonicalTable, | 
| String text, | 
| - {Encoding encoding: UTF8, | 
| - bool spaceToPlus: false}) { | 
| - byteToHex(byte, buffer) { | 
| - const String hex = '0123456789ABCDEF'; | 
| - buffer.writeCharCode(hex.codeUnitAt(byte >> 4)); | 
| - buffer.writeCharCode(hex.codeUnitAt(byte & 0x0f)); | 
| + Encoding encoding, | 
| + bool spaceToPlus) { | 
| + // Use a specialized encoder for known Unicode-compatible encodings. | 
| + // This avoids encoding the string first and then working on the bytes, | 
| + // and instead works directly on the code units of the string. | 
| + if (identical(encoding, UTF8)) { | 
| + return _uriEncodeUtf8(canonicalTable, text, spaceToPlus); | 
| + } | 
| + if (identical(encoding, LATIN1)) { | 
| + return _uriEncodeSubset(canonicalTable, text, 255, spaceToPlus); | 
| + } | 
| + if (identical(encoding, ASCII)) { | 
| + return _uriEncodeSubset(canonicalTable, text, 127, spaceToPlus); | 
| } | 
| // Encode the string into bytes then generate an ASCII only string | 
| // by percent encoding selected bytes. | 
| - StringBuffer result = new StringBuffer(); | 
| var bytes = encoding.encode(text); | 
| - for (int i = 0; i < bytes.length; i++) { | 
| - int byte = bytes[i]; | 
| - if (byte < 128 && | 
| - ((canonicalTable[byte >> 4] & (1 << (byte & 0x0f))) != 0)) { | 
| - result.writeCharCode(byte); | 
| - } else if (spaceToPlus && byte == _SPACE) { | 
| + int i = 0; | 
| + noChange: { | 
| + while (i < bytes.length) { | 
| + int byte = bytes[i]; | 
| + if (byte < 128 && | 
| + ((canonicalTable[byte >> 4] & (1 << (byte & 0x0f))) != 0)) { | 
| + break noChange; | 
| + } | 
| + i++; | 
| + } | 
| + return text; | 
| + } | 
| + StringBuffer result = new StringBuffer(); | 
| + for (int j = 0; j < i; j++) { | 
| + result[j] = text.codeUnitAt(j); | 
| 
sra1
2015/11/03 17:28:42
StringBuffer does not have []=.
 
Lasse Reichstein Nielsen
2015/11/03 18:04:08
Duh, should be writeCharCode. 
Obviously needs mor
 | 
| + } | 
| + while (true) { | 
| + if (spaceToPlus && char == _SPACE) { | 
| result.writeCharCode(_PLUS); | 
| } else { | 
| - result.writeCharCode(_PERCENT); | 
| - byteToHex(byte, result); | 
| + result..writeCharCode(_PERCENT) | 
| + ..writeCharCode(_hexDigits.codeUnitAt(char >> 4)) | 
| + ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF)); | 
| + } | 
| + noChange: { // See dartbug.com/21481 | 
| + while (++i < text.length) { | 
| + char = text.codeUnitAt(i); | 
| + if (char < 128 && | 
| + (canonicalTable[char >> 4] & (1 << (char & 0xf)) != 0)) { | 
| + result.writeCharCode(char); | 
| + } else { | 
| + break noChange; | 
| + } | 
| + } | 
| + break; | 
| + } | 
| + } | 
| + return result.toString(); | 
| + } | 
| + | 
| + /** | 
| + * Encodes a text where the encoding is a subset of Unicode. | 
| + * | 
| + * The subsets are either Latin-1 or US-ASCII, and they are distinguished | 
| + * by the [limit] parameter which is the maximal code point allowed | 
| + * by the encoding. | 
| + */ | 
| + static String _uriEncodeSubset(List<int> canonicalTable, String text, | 
| + int limit, bool spaceToPlus) { | 
| + assert(limit == 127 || limit == 255); | 
| + int i = 0; | 
| + int char; | 
| + noChange: { // See dartbug.com/21481 | 
| + while (i < text.length) { | 
| + char = text.codeUnitAt(i); | 
| + if (char >= 128 || | 
| + (canonicalTable[char >> 4] & (1 << (char & 0xf)) == 0)) { | 
| + break noChange; | 
| + } | 
| + i++; | 
| + } | 
| + return text; | 
| + } | 
| + StringBuffer result = new StringBuffer(); | 
| + for (int j = 0; j < i; j++) { | 
| + result.writeCharCode(text.codeUnitAt(j)); | 
| + } | 
| + while (true) { | 
| + if (char <= limit) { | 
| + if (spaceToPlus && char == _SPACE) { | 
| + result.writeCharCode(_PLUS); | 
| + } else { | 
| + result..writeCharCode(_PERCENT) | 
| + ..writeCharCode(_hexDigits.codeUnitAt(char >> 4)) | 
| + ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF)); | 
| + } | 
| + } else { | 
| + if (limit == 255) { | 
| + throw new ArgumentError.value( | 
| + text, "Source contains non-Latin-1 characters."); | 
| + } | 
| + throw new ArgumentError.value( | 
| + text, "Source contains non-ASCII bytes."); | 
| + } | 
| + noChange: { // See dartbug.com/21481 | 
| + while (++i < text.length) { | 
| + char = text.codeUnitAt(i); | 
| + if (char < 128 && | 
| + (canonicalTable[char >> 4] & (1 << (char & 0xf)) != 0)) { | 
| + result.writeCharCode(char); | 
| + } else { | 
| + break noChange; | 
| + } | 
| + } | 
| + break; | 
| + } | 
| + } | 
| + return result.toString(); | 
| + } | 
| + | 
| + static String _uriEncodeUtf8(List<int> canonicalTable, String text, | 
| + bool spaceToPlus) { | 
| + int i = 0; | 
| + int char; | 
| + noChange: { // See dartbug.com/21481 | 
| + while (i < text.length) { | 
| + char = text.codeUnitAt(i); | 
| + if (char >= 128 || | 
| + (canonicalTable[char >> 4] & (1 << (char & 0xf)) == 0)) { | 
| + break noChange; | 
| + } | 
| + i++; | 
| + } | 
| + return text; | 
| + } | 
| + StringBuffer result = new StringBuffer(); | 
| + for (int j = 0; j < i; j++) { | 
| + result.writeCharCode(text.codeUnitAt(j)); | 
| + } | 
| + while (true) { | 
| + if (char < 128) { | 
| + if (spaceToPlus && char == _SPACE) { | 
| + result.writeCharCode(_PLUS); | 
| + } else { | 
| + result..writeCharCode(_PERCENT) | 
| + ..writeCharCode(_ZERO + (char >> 4)) // Range 0-7. | 
| + ..writeCharCode(_hexDigits.codeUnitAt(char & 0x0F)); | 
| + } | 
| + } else if (char < 0x800) { | 
| + result..writeCharCode(_PERCENT) | 
| + ..writeCharCode(_hexDigits.codeUnitAt(0xC + (char >> 10))) | 
| + ..writeCharCode(_hexDigits.codeUnitAt((char >> 6) & 0xF)) | 
| + ..writeCharCode(_PERCENT) | 
| + ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 4) & 3))) | 
| + ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF)); | 
| + } else { | 
| + assert(char < 0x10000); // UTF-16 code unit. | 
| + int next; | 
| + if (char & 0xFC00 != 0xD800 || | 
| + i + 1 == text.length || | 
| + (next = text.codeUnitAt(i + 1)) & 0xFC00 != 0xDC00) { | 
| + result..writeCharCode(_PERCENT) | 
| + ..writeCharCode(_UPPER_CASE_E) | 
| + ..writeCharCode(_hexDigits.codeUnitAt((char >> 12) & 0xF)) | 
| + ..writeCharCode(_PERCENT) | 
| + ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 10) & 3))) | 
| + ..writeCharCode(_hexDigits.codeUnitAt((char >> 6) & 0xF)) | 
| + ..writeCharCode(_PERCENT) | 
| + ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 4) & 3))) | 
| + ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF)); | 
| + } else { | 
| + // Lead surrogate followed by tail surrogate. | 
| + char = 0x10000 + (((char & 0x3FF) << 10) | (next & 0x3FF)); | 
| + i++; | 
| + result..writeCharCode(_PERCENT) | 
| + ..writeCharCode(_UPPER_CASE_F) | 
| + ..writeCharCode(_ZERO + (char >> 18)) // Range 0..7 | 
| + ..writeCharCode(_PERCENT) | 
| + ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 16) & 3))) | 
| + ..writeCharCode(_hexDigits.codeUnitAt((char >> 12) & 0xF)) | 
| + ..writeCharCode(_PERCENT) | 
| + ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 10) & 3))) | 
| + ..writeCharCode(_hexDigits.codeUnitAt((char >> 6) & 0xF)) | 
| + ..writeCharCode(_PERCENT) | 
| + ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 4) & 3))) | 
| + ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF)); | 
| + } | 
| + } | 
| + noChange: { // See dartbug.com/21481 | 
| + while (++i < text.length) { | 
| + char = text.codeUnitAt(i); | 
| + if (char < 128 && | 
| + (canonicalTable[char >> 4] & (1 << (char & 0xf)) != 0)) { | 
| + result.writeCharCode(char); | 
| + } else { | 
| + break noChange; | 
| + } | 
| + } | 
| + // Return result.toString(), but move the return to the end of the | 
| + // function to appease analysis. | 
| + break; | 
| } | 
| } | 
| return result.toString(); |