Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(77)

Unified Diff: sdk/lib/core/uri.dart

Issue 1409053007: More optimization of _uriEncode. (Closed) Base URL: https://github.com/dart-lang/sdk.git@master
Patch Set: Call directly to specialized UTF-8 version. Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « sdk/lib/core/core.dart ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: sdk/lib/core/uri.dart
diff --git a/sdk/lib/core/uri.dart b/sdk/lib/core/uri.dart
index 10aa827cb190c6e93a8889cad67312629145d5f5..bd4d1a1e9e3e4dd81168bec83eb937966a41e110 100644
--- a/sdk/lib/core/uri.dart
+++ b/sdk/lib/core/uri.dart
@@ -1242,7 +1242,8 @@ class Uri {
if (path != null) {
result = _normalize(path, start, end, _pathCharOrSlashTable);
} else {
- result = pathSegments.map((s) => _uriEncode(_pathCharTable, s)).join("/");
+ result = pathSegments.map((s) =>
+ _uriEncodeUtf8(_pathCharTable, s, false)).join("/");
}
if (result.isEmpty) {
if (isFile) return "/";
@@ -1952,7 +1953,7 @@ class Uri {
* a [Uri].
*/
static String encodeComponent(String component) {
- return _uriEncode(_unreserved2396Table, component);
+ return _uriEncodeUtf8(_unreserved2396Table, component, false);
}
/**
@@ -1990,8 +1991,9 @@ class Uri {
*/
static String encodeQueryComponent(String component,
{Encoding encoding: UTF8}) {
+ const int spaceToPlus = true;
sra1 2015/11/03 17:28:42 checked mode error
return _uriEncode(
- _unreservedTable, component, encoding: encoding, spaceToPlus: true);
+ _unreservedTable, component, encoding, spaceToPlus);
}
/**
@@ -2035,7 +2037,7 @@ class Uri {
* the encodeURI function .
*/
static String encodeFull(String uri) {
- return _uriEncode(_encodeFullTable, uri);
+ return _uriEncodeUtf8(_encodeFullTable, uri, false);
}
/**
@@ -2241,6 +2243,7 @@ class Uri {
static const int _QUESTION = 0x3F;
static const int _AT_SIGN = 0x40;
static const int _UPPER_CASE_A = 0x41;
+ static const int _UPPER_CASE_E = 0x45;
static const int _UPPER_CASE_F = 0x46;
static const int _UPPER_CASE_Z = 0x5A;
static const int _LEFT_BRACKET = 0x5B;
@@ -2251,6 +2254,8 @@ class Uri {
static const int _LOWER_CASE_Z = 0x7A;
static const int _BAR = 0x7C;
+ static const String _hexDigits = "0123456789ABCDEF";
+
/**
* This is the internal implementation of JavaScript's encodeURI function.
* It encodes all characters in the string [text] except for those
@@ -2258,28 +2263,205 @@ class Uri {
*/
static String _uriEncode(List<int> canonicalTable,
String text,
- {Encoding encoding: UTF8,
- bool spaceToPlus: false}) {
- byteToHex(byte, buffer) {
- const String hex = '0123456789ABCDEF';
- buffer.writeCharCode(hex.codeUnitAt(byte >> 4));
- buffer.writeCharCode(hex.codeUnitAt(byte & 0x0f));
+ Encoding encoding,
+ bool spaceToPlus) {
+ // Use a specialized encoder for known Unicode-compatible encodings.
+ // This avoids encoding the string first and then working on the bytes,
+ // and instead works directly on the code units of the string.
+ if (identical(encoding, UTF8)) {
+ return _uriEncodeUtf8(canonicalTable, text, spaceToPlus);
+ }
+ if (identical(encoding, LATIN1)) {
+ return _uriEncodeSubset(canonicalTable, text, 255, spaceToPlus);
+ }
+ if (identical(encoding, ASCII)) {
+ return _uriEncodeSubset(canonicalTable, text, 127, spaceToPlus);
}
// Encode the string into bytes then generate an ASCII only string
// by percent encoding selected bytes.
- StringBuffer result = new StringBuffer();
var bytes = encoding.encode(text);
- for (int i = 0; i < bytes.length; i++) {
- int byte = bytes[i];
- if (byte < 128 &&
- ((canonicalTable[byte >> 4] & (1 << (byte & 0x0f))) != 0)) {
- result.writeCharCode(byte);
- } else if (spaceToPlus && byte == _SPACE) {
+ int i = 0;
+ noChange: {
+ while (i < bytes.length) {
+ int byte = bytes[i];
+ if (byte < 128 &&
+ ((canonicalTable[byte >> 4] & (1 << (byte & 0x0f))) != 0)) {
+ break noChange;
+ }
+ i++;
+ }
+ return text;
+ }
+ StringBuffer result = new StringBuffer();
+ for (int j = 0; j < i; j++) {
+ result[j] = text.codeUnitAt(j);
sra1 2015/11/03 17:28:42 StringBuffer does not have []=.
Lasse Reichstein Nielsen 2015/11/03 18:04:08 Duh, should be writeCharCode. Obviously needs mor
+ }
+ while (true) {
+ if (spaceToPlus && char == _SPACE) {
result.writeCharCode(_PLUS);
} else {
- result.writeCharCode(_PERCENT);
- byteToHex(byte, result);
+ result..writeCharCode(_PERCENT)
+ ..writeCharCode(_hexDigits.codeUnitAt(char >> 4))
+ ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));
+ }
+ noChange: { // See dartbug.com/21481
+ while (++i < text.length) {
+ char = text.codeUnitAt(i);
+ if (char < 128 &&
+ (canonicalTable[char >> 4] & (1 << (char & 0xf)) != 0)) {
+ result.writeCharCode(char);
+ } else {
+ break noChange;
+ }
+ }
+ break;
+ }
+ }
+ return result.toString();
+ }
+
+ /**
+ * Encodes a text where the encoding is a subset of Unicode.
+ *
+ * The subsets are either Latin-1 or US-ASCII, and they are distinguished
+ * by the [limit] parameter which is the maximal code point allowed
+ * by the encoding.
+ */
+ static String _uriEncodeSubset(List<int> canonicalTable, String text,
+ int limit, bool spaceToPlus) {
+ assert(limit == 127 || limit == 255);
+ int i = 0;
+ int char;
+ noChange: { // See dartbug.com/21481
+ while (i < text.length) {
+ char = text.codeUnitAt(i);
+ if (char >= 128 ||
+ (canonicalTable[char >> 4] & (1 << (char & 0xf)) == 0)) {
+ break noChange;
+ }
+ i++;
+ }
+ return text;
+ }
+ StringBuffer result = new StringBuffer();
+ for (int j = 0; j < i; j++) {
+ result.writeCharCode(text.codeUnitAt(j));
+ }
+ while (true) {
+ if (char <= limit) {
+ if (spaceToPlus && char == _SPACE) {
+ result.writeCharCode(_PLUS);
+ } else {
+ result..writeCharCode(_PERCENT)
+ ..writeCharCode(_hexDigits.codeUnitAt(char >> 4))
+ ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));
+ }
+ } else {
+ if (limit == 255) {
+ throw new ArgumentError.value(
+ text, "Source contains non-Latin-1 characters.");
+ }
+ throw new ArgumentError.value(
+ text, "Source contains non-ASCII bytes.");
+ }
+ noChange: { // See dartbug.com/21481
+ while (++i < text.length) {
+ char = text.codeUnitAt(i);
+ if (char < 128 &&
+ (canonicalTable[char >> 4] & (1 << (char & 0xf)) != 0)) {
+ result.writeCharCode(char);
+ } else {
+ break noChange;
+ }
+ }
+ break;
+ }
+ }
+ return result.toString();
+ }
+
+ static String _uriEncodeUtf8(List<int> canonicalTable, String text,
+ bool spaceToPlus) {
+ int i = 0;
+ int char;
+ noChange: { // See dartbug.com/21481
+ while (i < text.length) {
+ char = text.codeUnitAt(i);
+ if (char >= 128 ||
+ (canonicalTable[char >> 4] & (1 << (char & 0xf)) == 0)) {
+ break noChange;
+ }
+ i++;
+ }
+ return text;
+ }
+ StringBuffer result = new StringBuffer();
+ for (int j = 0; j < i; j++) {
+ result.writeCharCode(text.codeUnitAt(j));
+ }
+ while (true) {
+ if (char < 128) {
+ if (spaceToPlus && char == _SPACE) {
+ result.writeCharCode(_PLUS);
+ } else {
+ result..writeCharCode(_PERCENT)
+ ..writeCharCode(_ZERO + (char >> 4)) // Range 0-7.
+ ..writeCharCode(_hexDigits.codeUnitAt(char & 0x0F));
+ }
+ } else if (char < 0x800) {
+ result..writeCharCode(_PERCENT)
+ ..writeCharCode(_hexDigits.codeUnitAt(0xC + (char >> 10)))
+ ..writeCharCode(_hexDigits.codeUnitAt((char >> 6) & 0xF))
+ ..writeCharCode(_PERCENT)
+ ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 4) & 3)))
+ ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));
+ } else {
+ assert(char < 0x10000); // UTF-16 code unit.
+ int next;
+ if (char & 0xFC00 != 0xD800 ||
+ i + 1 == text.length ||
+ (next = text.codeUnitAt(i + 1)) & 0xFC00 != 0xDC00) {
+ result..writeCharCode(_PERCENT)
+ ..writeCharCode(_UPPER_CASE_E)
+ ..writeCharCode(_hexDigits.codeUnitAt((char >> 12) & 0xF))
+ ..writeCharCode(_PERCENT)
+ ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 10) & 3)))
+ ..writeCharCode(_hexDigits.codeUnitAt((char >> 6) & 0xF))
+ ..writeCharCode(_PERCENT)
+ ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 4) & 3)))
+ ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));
+ } else {
+ // Lead surrogate followed by tail surrogate.
+ char = 0x10000 + (((char & 0x3FF) << 10) | (next & 0x3FF));
+ i++;
+ result..writeCharCode(_PERCENT)
+ ..writeCharCode(_UPPER_CASE_F)
+ ..writeCharCode(_ZERO + (char >> 18)) // Range 0..7
+ ..writeCharCode(_PERCENT)
+ ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 16) & 3)))
+ ..writeCharCode(_hexDigits.codeUnitAt((char >> 12) & 0xF))
+ ..writeCharCode(_PERCENT)
+ ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 10) & 3)))
+ ..writeCharCode(_hexDigits.codeUnitAt((char >> 6) & 0xF))
+ ..writeCharCode(_PERCENT)
+ ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 4) & 3)))
+ ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));
+ }
+ }
+ noChange: { // See dartbug.com/21481
+ while (++i < text.length) {
+ char = text.codeUnitAt(i);
+ if (char < 128 &&
+ (canonicalTable[char >> 4] & (1 << (char & 0xf)) != 0)) {
+ result.writeCharCode(char);
+ } else {
+ break noChange;
+ }
+ }
+ // Return result.toString(), but move the return to the end of the
+ // function to appease analysis.
+ break;
}
}
return result.toString();
« no previous file with comments | « sdk/lib/core/core.dart ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698