sdk/lib/core/uri.dart - Issue 1409053007: More optimization of _uriEncode.

Unified Diff: sdk/lib/core/uri.dart

Issue 1409053007: More optimization of _uriEncode. (Closed) Base URL: https://github.com/dart-lang/sdk.git@master

Patch Set: Call directly to specialized UTF-8 version. Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: sdk/lib/core/uri.dart

diff --git a/sdk/lib/core/uri.dart b/sdk/lib/core/uri.dart

index 10aa827cb190c6e93a8889cad67312629145d5f5..bd4d1a1e9e3e4dd81168bec83eb937966a41e110 100644

--- a/sdk/lib/core/uri.dart

+++ b/sdk/lib/core/uri.dart

@@ -1242,7 +1242,8 @@ class Uri {

if (path != null) {

result = _normalize(path, start, end, _pathCharOrSlashTable);

} else {

- result = pathSegments.map((s) => _uriEncode(_pathCharTable, s)).join("/");

+ result = pathSegments.map((s) =>

+ _uriEncodeUtf8(_pathCharTable, s, false)).join("/");

}

if (result.isEmpty) {

if (isFile) return "/";

@@ -1952,7 +1953,7 @@ class Uri {

* a [Uri].

static String encodeComponent(String component) {

- return _uriEncode(_unreserved2396Table, component);

+ return _uriEncodeUtf8(_unreserved2396Table, component, false);

}

/**

@@ -1990,8 +1991,9 @@ class Uri {

static String encodeQueryComponent(String component,

{Encoding encoding: UTF8}) {

+ const int spaceToPlus = true;

sra1 2015/11/03 17:28:42 checked mode error

return _uriEncode(

- _unreservedTable, component, encoding: encoding, spaceToPlus: true);

+ _unreservedTable, component, encoding, spaceToPlus);

}

/**

@@ -2035,7 +2037,7 @@ class Uri {

* the encodeURI function .

static String encodeFull(String uri) {

- return _uriEncode(_encodeFullTable, uri);

+ return _uriEncodeUtf8(_encodeFullTable, uri, false);

}

/**

@@ -2241,6 +2243,7 @@ class Uri {

static const int _QUESTION = 0x3F;

static const int _AT_SIGN = 0x40;

static const int _UPPER_CASE_A = 0x41;

+ static const int _UPPER_CASE_E = 0x45;

static const int _UPPER_CASE_F = 0x46;

static const int _UPPER_CASE_Z = 0x5A;

static const int _LEFT_BRACKET = 0x5B;

@@ -2251,6 +2254,8 @@ class Uri {

static const int _LOWER_CASE_Z = 0x7A;

static const int _BAR = 0x7C;

+ static const String _hexDigits = "0123456789ABCDEF";

/**

* This is the internal implementation of JavaScript's encodeURI function.

* It encodes all characters in the string [text] except for those

@@ -2258,28 +2263,205 @@ class Uri {

static String _uriEncode(List<int> canonicalTable,

String text,

- {Encoding encoding: UTF8,

- bool spaceToPlus: false}) {

- byteToHex(byte, buffer) {

- const String hex = '0123456789ABCDEF';

- buffer.writeCharCode(hex.codeUnitAt(byte >> 4));

- buffer.writeCharCode(hex.codeUnitAt(byte & 0x0f));

+ Encoding encoding,

+ bool spaceToPlus) {

+ // Use a specialized encoder for known Unicode-compatible encodings.

+ // This avoids encoding the string first and then working on the bytes,

+ // and instead works directly on the code units of the string.

+ if (identical(encoding, UTF8)) {

+ return _uriEncodeUtf8(canonicalTable, text, spaceToPlus);

+ }

+ if (identical(encoding, LATIN1)) {

+ return _uriEncodeSubset(canonicalTable, text, 255, spaceToPlus);

+ }

+ if (identical(encoding, ASCII)) {

+ return _uriEncodeSubset(canonicalTable, text, 127, spaceToPlus);

}

// Encode the string into bytes then generate an ASCII only string

// by percent encoding selected bytes.

- StringBuffer result = new StringBuffer();

var bytes = encoding.encode(text);

- for (int i = 0; i < bytes.length; i++) {

- int byte = bytes[i];

- if (byte < 128 &&

- ((canonicalTable[byte >> 4] & (1 << (byte & 0x0f))) != 0)) {

- result.writeCharCode(byte);

- } else if (spaceToPlus && byte == _SPACE) {

+ int i = 0;

+ noChange: {

+ while (i < bytes.length) {

+ int byte = bytes[i];

+ if (byte < 128 &&

+ ((canonicalTable[byte >> 4] & (1 << (byte & 0x0f))) != 0)) {

+ break noChange;

+ }

+ i++;

+ }

+ return text;

+ }

+ StringBuffer result = new StringBuffer();

+ for (int j = 0; j < i; j++) {

+ result[j] = text.codeUnitAt(j);

sra1 2015/11/03 17:28:42 StringBuffer does not have []=.

Lasse Reichstein Nielsen 2015/11/03 18:04:08 Duh, should be writeCharCode. Obviously needs mor

+ }

+ while (true) {

+ if (spaceToPlus && char == _SPACE) {

result.writeCharCode(_PLUS);

} else {

- result.writeCharCode(_PERCENT);

- byteToHex(byte, result);

+ result..writeCharCode(_PERCENT)

+ ..writeCharCode(_hexDigits.codeUnitAt(char >> 4))

+ ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));

+ }

+ noChange: { // See dartbug.com/21481

+ while (++i < text.length) {

+ char = text.codeUnitAt(i);

+ if (char < 128 &&

+ (canonicalTable[char >> 4] & (1 << (char & 0xf)) != 0)) {

+ result.writeCharCode(char);

+ } else {

+ break noChange;

+ }

+ break;

+ }

+ return result.toString();

+ }

+ /**

+ * Encodes a text where the encoding is a subset of Unicode.

+ *

+ * The subsets are either Latin-1 or US-ASCII, and they are distinguished

+ * by the [limit] parameter which is the maximal code point allowed

+ * by the encoding.

+ */

+ static String _uriEncodeSubset(List<int> canonicalTable, String text,

+ int limit, bool spaceToPlus) {

+ assert(limit == 127 || limit == 255);

+ int i = 0;

+ int char;

+ noChange: { // See dartbug.com/21481

+ while (i < text.length) {

+ char = text.codeUnitAt(i);

+ if (char >= 128 ||

+ (canonicalTable[char >> 4] & (1 << (char & 0xf)) == 0)) {

+ break noChange;

+ }

+ i++;

+ }

+ return text;

+ }

+ StringBuffer result = new StringBuffer();

+ for (int j = 0; j < i; j++) {

+ result.writeCharCode(text.codeUnitAt(j));

+ }

+ while (true) {

+ if (char <= limit) {

+ if (spaceToPlus && char == _SPACE) {

+ result.writeCharCode(_PLUS);

+ } else {

+ result..writeCharCode(_PERCENT)

+ ..writeCharCode(_hexDigits.codeUnitAt(char >> 4))

+ ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));

+ }

+ } else {

+ if (limit == 255) {

+ throw new ArgumentError.value(

+ text, "Source contains non-Latin-1 characters.");

+ }

+ throw new ArgumentError.value(

+ text, "Source contains non-ASCII bytes.");

+ }

+ noChange: { // See dartbug.com/21481

+ while (++i < text.length) {

+ char = text.codeUnitAt(i);

+ if (char < 128 &&

+ (canonicalTable[char >> 4] & (1 << (char & 0xf)) != 0)) {

+ result.writeCharCode(char);

+ } else {

+ break noChange;

+ }

+ break;

+ }

+ return result.toString();

+ }

+ static String _uriEncodeUtf8(List<int> canonicalTable, String text,

+ bool spaceToPlus) {

+ int i = 0;

+ int char;

+ noChange: { // See dartbug.com/21481

+ while (i < text.length) {

+ char = text.codeUnitAt(i);

+ if (char >= 128 ||

+ (canonicalTable[char >> 4] & (1 << (char & 0xf)) == 0)) {

+ break noChange;

+ }

+ i++;

+ }

+ return text;

+ }

+ StringBuffer result = new StringBuffer();

+ for (int j = 0; j < i; j++) {

+ result.writeCharCode(text.codeUnitAt(j));

+ }

+ while (true) {

+ if (char < 128) {

+ if (spaceToPlus && char == _SPACE) {

+ result.writeCharCode(_PLUS);

+ } else {

+ result..writeCharCode(_PERCENT)

+ ..writeCharCode(_ZERO + (char >> 4)) // Range 0-7.

+ ..writeCharCode(_hexDigits.codeUnitAt(char & 0x0F));

+ }

+ } else if (char < 0x800) {

+ result..writeCharCode(_PERCENT)

+ ..writeCharCode(_hexDigits.codeUnitAt(0xC + (char >> 10)))

+ ..writeCharCode(_hexDigits.codeUnitAt((char >> 6) & 0xF))

+ ..writeCharCode(_PERCENT)

+ ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 4) & 3)))

+ ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));

+ } else {

+ assert(char < 0x10000); // UTF-16 code unit.

+ int next;

+ if (char & 0xFC00 != 0xD800 ||

+ i + 1 == text.length ||

+ (next = text.codeUnitAt(i + 1)) & 0xFC00 != 0xDC00) {

+ result..writeCharCode(_PERCENT)

+ ..writeCharCode(_UPPER_CASE_E)

+ ..writeCharCode(_hexDigits.codeUnitAt((char >> 12) & 0xF))

+ ..writeCharCode(_PERCENT)

+ ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 10) & 3)))

+ ..writeCharCode(_hexDigits.codeUnitAt((char >> 6) & 0xF))

+ ..writeCharCode(_PERCENT)

+ ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 4) & 3)))

+ ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));

+ } else {

+ // Lead surrogate followed by tail surrogate.

+ char = 0x10000 + (((char & 0x3FF) << 10) | (next & 0x3FF));

+ i++;

+ result..writeCharCode(_PERCENT)

+ ..writeCharCode(_UPPER_CASE_F)

+ ..writeCharCode(_ZERO + (char >> 18)) // Range 0..7

+ ..writeCharCode(_PERCENT)

+ ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 16) & 3)))

+ ..writeCharCode(_hexDigits.codeUnitAt((char >> 12) & 0xF))

+ ..writeCharCode(_PERCENT)

+ ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 10) & 3)))

+ ..writeCharCode(_hexDigits.codeUnitAt((char >> 6) & 0xF))

+ ..writeCharCode(_PERCENT)

+ ..writeCharCode(_hexDigits.codeUnitAt(0x8 + ((char >> 4) & 3)))

+ ..writeCharCode(_hexDigits.codeUnitAt(char & 0xF));

+ }

+ noChange: { // See dartbug.com/21481

+ while (++i < text.length) {

+ char = text.codeUnitAt(i);

+ if (char < 128 &&

+ (canonicalTable[char >> 4] & (1 << (char & 0xf)) != 0)) {

+ result.writeCharCode(char);

+ } else {

+ break noChange;

+ }

+ // Return result.toString(), but move the return to the end of the

+ // function to appease analysis.

+ break;

}

return result.toString();

« no previous file with comments | « sdk/lib/core/core.dart ('k') | no next file » | no next file with comments »