runtime/lib/string_base.dart - Issue 11411092: Revert "Add some support for the code-point code-unit distinction."

Unified Diff: runtime/lib/string_base.dart

Issue 11411092: Revert "Add some support for the code-point code-unit distinction." (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Created 8 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: runtime/lib/string_base.dart

diff --git a/runtime/lib/string_base.dart b/runtime/lib/string_base.dart

index 8aa16df99c5a24b8e54e7e9bf529735fdc214896..22e17b9662c0098a0a15de1770e430b36c459db3 100644

--- a/runtime/lib/string_base.dart

+++ b/runtime/lib/string_base.dart

@@ -16,29 +16,29 @@ class _StringBase {

int get hashCode native "String_getHashCode";

/**

- * Create the most efficient string representation for the specified UTF-16

- * [codeUnits].

+ * Create the most efficient string representation for specified

+ * [codePoints].

- static String createFromUtf16(List<int> codeUnits) {

+ static String createFromCharCodes(List<int> charCodes) {

_ObjectArray objectArray;

- if (codeUnits is _ObjectArray) {

- objectArray = codeUnits;

+ if (charCodes is _ObjectArray) {

+ objectArray = charCodes;

} else {

- int len = codeUnits.length;

+ int len = charCodes.length;

objectArray = new _ObjectArray(len);

for (int i = 0; i < len; i++) {

- objectArray[i] = codeUnits[i];

+ objectArray[i] = charCodes[i];

}

- return _createFromUtf16(objectArray);

+ return _createFromCodePoints(objectArray);

}

- static String _createFromUtf16(List<int> codeUnits)

- native "StringBase_createFromUtf16";

+ static String _createFromCodePoints(List<int> codePoints)

+ native "StringBase_createFromCodePoints";

String operator [](int index) native "String_charAt";

- int codeUnitAt(int index) native "String_codeUnitAt";

+ int charCodeAt(int index) native "String_charCodeAt";

int get length native "String_getLength";

@@ -69,12 +69,12 @@ class _StringBase {

int otherLength = other.length;

int len = (thisLength < otherLength) ? thisLength : otherLength;

for (int i = 0; i < len; i++) {

- int thisCodeUnit = this.codeUnitAt(i);

- int otherCodeUnit = other.codeUnitAt(i);

- if (thisCodeUnit < otherCodeUnit) {

+ int thisCodePoint = this.charCodeAt(i);

+ int otherCodePoint = other.charCodeAt(i);

+ if (thisCodePoint < otherCodePoint) {

return -1;

}

- if (thisCodeUnit > otherCodeUnit) {

+ if (thisCodePoint > otherCodePoint) {

return 1;

}

@@ -93,7 +93,7 @@ class _StringBase {

return false;

}

for (int i = 0; i < len; i++) {

- if (this.codeUnitAt(i + start) != other.codeUnitAt(i)) {

+ if (this.charCodeAt(i + start) != other.charCodeAt(i)) {

return false;

}

@@ -162,9 +162,7 @@ class _StringBase {

final int len = this.length;

int first = 0;

for (; first < len; first++) {

- // There are no whitespace characters that are outside the BMP so we

- // can use code units here for efficiency.

- if (!_isWhitespace(this.codeUnitAt(first))) {

+ if (!_isWhitespace(this.charCodeAt(first))) {

break;

}

@@ -174,7 +172,7 @@ class _StringBase {

}

int last = len - 1;

for (; last >= first; last--) {

- if (!_isWhitespace(this.codeUnitAt(last))) {

+ if (!_isWhitespace(this.charCodeAt(last))) {

break;

}

@@ -295,35 +293,20 @@ class _StringBase {

return result;

}

- // TODO(erikcorry): Fix this to use the new code point iterator when it is

- // available.

List<String> splitChars() {

int len = this.length;

final result = new List<String>(len);

- bool supplementaryCharacterSeen = false;

- int i, j;

- for (i = j = 0; i < len; i++, j++) {

- int c = charCodeAt(i);

- // Check for non-basic plane character encoded as a UTF-16 surrogate pair.

- if (c >= String.SUPPLEMENTARY_CODE_POINT_BASE) {

- i++;

- supplementaryCharacterSeen = true;

- }

- result[j] = new String.fromCharCodes([c]);

+ for (int i = 0; i < len; i++) {

+ result[i] = this[i];

}

- if (!supplementaryCharacterSeen) return result;

- // If we saw some non-basic plane characters, then we have to return a

- // slightly smaller array than expected (we can't trim the original one

- // because it is non-extendable). This rarely happens so this is preferable

- // to having a separate pass over the string to count the code points.

- return result.getRange(0, j);

+ return result;

}

- List<int> get codeUnits {

+ List<int> get charCodes {

int len = this.length;

final result = new List<int>(len);

for (int i = 0; i < len; i++) {

- result[i] = this.codeUnitAt(i);

+ result[i] = this.charCodeAt(i);

}

return result;

}

@@ -377,91 +360,48 @@ class _OneByteString extends _StringBase implements String {

"_OneByteString can only be allocated by the VM");

}

+ // Checks for one-byte whitespaces only.

+ // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid

+ // whitespaces for one byte strings.

bool _isWhitespace(int codePoint) {

return

- (codePoint == 32) || // Space.

- (codePoint == 0xa0) || // No-break space.

+ (codePoint == 32) || // Space.

((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.

}

- int charCodeAt(int index) => codeUnitAt(index);

- List<int> get charCodes => codeUnits;

}

-class _TwoByteStringBase extends _StringBase {

- factory _TwoByteStringBase._uninstantiable() {

+class _TwoByteString extends _StringBase implements String {

+ factory _TwoByteString._uninstantiable() {

throw new UnsupportedError(

- "_TwoByteStringBase can't be instaniated");

+ "_TwoByteString can only be allocated by the VM");

}

- // Works for both code points and code units since all spaces are in the BMP.

+ // Checks for one-byte whitespaces only.

+ // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid

+ // whitespaces. Add checking for multi-byte whitespace codepoints.

bool _isWhitespace(int codePoint) {

return

- (codePoint == 32) || // Space.

- (codePoint == 0xa0) || // No-break space.

- ((9 <= codePoint) && (codePoint <= 13)) || // CR, LF, TAB, etc.

- (codePoint >= 0x1680 && // Optimization.

- (codePoint == 0x1680 || // Ogham space mark.

- codePoint == 0x180e || // Mongolian vowel separator.

- (codePoint >= 0x2000 && codePoint <= 0x200a) || // Wide/narrow spaces.

- codePoint == 0x2028 || // Line separator.

- codePoint == 0x2029 || // Paragraph separator.

- codePoint == 0x202f || // Narrow no-break space.

- codePoint == 0x205f || // Medium mathematical space.

- codePoint == 0x3000 || // Ideographic space.

- codePoint == 0xfeff)); // BOM code.

- }

- int charCodeAt(int index) {

- const int LEAD_SURROGATE_BASE = 0xd800;

- const int LEAD_SURROGATE_END = 0xdbff;

- const int TRAIL_SURROGATE_BASE = 0xdc00;

- const int TRAIL_SURROGATE_END = 0xdfff;

- const int MASK = 0x3ff;

- int code = codeUnitAt(index);

- if (code < LEAD_SURROGATE_BASE || code > LEAD_SURROGATE_END) return code;

- if (index + 1 >= length) return code;

- int trail = codeUnitAt(index + 1);

- if (trail < TRAIL_SURROGATE_BASE || trail > TRAIL_SURROGATE_END) {

- return code;

- }

- return String.SUPPLEMENTARY_CODE_POINT_BASE +

- ((code & MASK) << 10) + (trail & MASK);

- }

- // TODO(erikcorry): Fix this to use the new code point iterator when it is

- // available.

- List<int> get charCodes {

- int len = this.length;

- final result = new List<int>(len);

- bool supplementaryCharacterSeen = false;

- int i, j;

- for (i = j = 0; i < len; i++, j++) {

- int c = this.charCodeAt(i);

- // Check for supplementary plane character encoded as a UTF-16 surrogate

- // pair.

- if (c >= String.SUPPLEMENTARY_CODE_POINT_BASE) {

- i++;

- supplementaryCharacterSeen = true;

- }

- result[j] = c;

- }

- if (!supplementaryCharacterSeen) return result;

- // If we saw some non-basic plane characters, then we have to return a

- // slightly smaller array than expected (we can't trim the original one

- // because it is non-extendable). This rarely happens so this is preferable

- // to having a separate pass over the string to count the code points.

- return result.getRange(0, j);

+ (codePoint == 32) || // Space.

+ ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.

}

-class _TwoByteString extends _TwoByteStringBase implements String {

- factory _TwoByteString._uninstantiable() {

+class _FourByteString extends _StringBase implements String {

+ factory _FourByteString._uninstantiable() {

throw new UnsupportedError(

- "_TwoByteString can only be allocated by the VM");

+ "_FourByteString can only be allocated by the VM");

+ }

+ // Checks for one-byte whitespaces only.

+ // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid

+ // whitespaces. Add checking for multi-byte whitespace codepoints.

+ bool _isWhitespace(int codePoint) {

+ return

+ (codePoint == 32) || // Space.

+ ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.

}

@@ -472,24 +412,48 @@ class _ExternalOneByteString extends _StringBase implements String {

"_ExternalOneByteString can only be allocated by the VM");

}

+ // Checks for one-byte whitespaces only.

+ // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid

+ // whitespaces for one byte strings.

bool _isWhitespace(int codePoint) {

return

- (codePoint == 32) || // Space.

- (codePoint == 0xa0) || // No-break space.

+ (codePoint == 32) || // Space.

((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.

}

- int charCodeAt(int index) => codeUnitAt(index);

- List<int> get charCodes => codeUnits;

}

-class _ExternalTwoByteString extends _TwoByteStringBase implements String {

+class _ExternalTwoByteString extends _StringBase implements String {

factory _ExternalTwoByteString._uninstantiable() {

throw new UnsupportedError(

"_ExternalTwoByteString can only be allocated by the VM");

}

+ // Checks for one-byte whitespaces only.

+ // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid

+ // whitespaces. Add checking for multi-byte whitespace codepoints.

+ bool _isWhitespace(int codePoint) {

+ return

+ (codePoint == 32) || // Space.

+ ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.

+ }

+class _ExternalFourByteString extends _StringBase implements String {

+ factory _ExternalFourByteString._uninstantiable() {

+ throw new UnsupportedError(

+ "ExternalFourByteString can only be allocated by the VM");

+ }

+ // Checks for one-byte whitespaces only.

+ // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid

+ // whitespaces. Add checking for multi-byte whitespace codepoints.

+ bool _isWhitespace(int codePoint) {

+ return

+ (codePoint == 32) || // Space.

+ ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.

+ }

}

« no previous file with comments | « runtime/lib/string.cc ('k') | runtime/lib/string_patch.dart » ('j') | no next file with comments »