Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(101)

Unified Diff: runtime/lib/string_base.dart

Issue 11411092: Revert "Add some support for the code-point code-unit distinction." (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « runtime/lib/string.cc ('k') | runtime/lib/string_patch.dart » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: runtime/lib/string_base.dart
diff --git a/runtime/lib/string_base.dart b/runtime/lib/string_base.dart
index 8aa16df99c5a24b8e54e7e9bf529735fdc214896..22e17b9662c0098a0a15de1770e430b36c459db3 100644
--- a/runtime/lib/string_base.dart
+++ b/runtime/lib/string_base.dart
@@ -16,29 +16,29 @@ class _StringBase {
int get hashCode native "String_getHashCode";
/**
- * Create the most efficient string representation for the specified UTF-16
- * [codeUnits].
+ * Create the most efficient string representation for specified
+ * [codePoints].
*/
- static String createFromUtf16(List<int> codeUnits) {
+ static String createFromCharCodes(List<int> charCodes) {
_ObjectArray objectArray;
- if (codeUnits is _ObjectArray) {
- objectArray = codeUnits;
+ if (charCodes is _ObjectArray) {
+ objectArray = charCodes;
} else {
- int len = codeUnits.length;
+ int len = charCodes.length;
objectArray = new _ObjectArray(len);
for (int i = 0; i < len; i++) {
- objectArray[i] = codeUnits[i];
+ objectArray[i] = charCodes[i];
}
}
- return _createFromUtf16(objectArray);
+ return _createFromCodePoints(objectArray);
}
- static String _createFromUtf16(List<int> codeUnits)
- native "StringBase_createFromUtf16";
+ static String _createFromCodePoints(List<int> codePoints)
+ native "StringBase_createFromCodePoints";
String operator [](int index) native "String_charAt";
- int codeUnitAt(int index) native "String_codeUnitAt";
+ int charCodeAt(int index) native "String_charCodeAt";
int get length native "String_getLength";
@@ -69,12 +69,12 @@ class _StringBase {
int otherLength = other.length;
int len = (thisLength < otherLength) ? thisLength : otherLength;
for (int i = 0; i < len; i++) {
- int thisCodeUnit = this.codeUnitAt(i);
- int otherCodeUnit = other.codeUnitAt(i);
- if (thisCodeUnit < otherCodeUnit) {
+ int thisCodePoint = this.charCodeAt(i);
+ int otherCodePoint = other.charCodeAt(i);
+ if (thisCodePoint < otherCodePoint) {
return -1;
}
- if (thisCodeUnit > otherCodeUnit) {
+ if (thisCodePoint > otherCodePoint) {
return 1;
}
}
@@ -93,7 +93,7 @@ class _StringBase {
return false;
}
for (int i = 0; i < len; i++) {
- if (this.codeUnitAt(i + start) != other.codeUnitAt(i)) {
+ if (this.charCodeAt(i + start) != other.charCodeAt(i)) {
return false;
}
}
@@ -162,9 +162,7 @@ class _StringBase {
final int len = this.length;
int first = 0;
for (; first < len; first++) {
- // There are no whitespace characters that are outside the BMP so we
- // can use code units here for efficiency.
- if (!_isWhitespace(this.codeUnitAt(first))) {
+ if (!_isWhitespace(this.charCodeAt(first))) {
break;
}
}
@@ -174,7 +172,7 @@ class _StringBase {
}
int last = len - 1;
for (; last >= first; last--) {
- if (!_isWhitespace(this.codeUnitAt(last))) {
+ if (!_isWhitespace(this.charCodeAt(last))) {
break;
}
}
@@ -295,35 +293,20 @@ class _StringBase {
return result;
}
- // TODO(erikcorry): Fix this to use the new code point iterator when it is
- // available.
List<String> splitChars() {
int len = this.length;
final result = new List<String>(len);
- bool supplementaryCharacterSeen = false;
- int i, j;
- for (i = j = 0; i < len; i++, j++) {
- int c = charCodeAt(i);
- // Check for non-basic plane character encoded as a UTF-16 surrogate pair.
- if (c >= String.SUPPLEMENTARY_CODE_POINT_BASE) {
- i++;
- supplementaryCharacterSeen = true;
- }
- result[j] = new String.fromCharCodes([c]);
+ for (int i = 0; i < len; i++) {
+ result[i] = this[i];
}
- if (!supplementaryCharacterSeen) return result;
- // If we saw some non-basic plane characters, then we have to return a
- // slightly smaller array than expected (we can't trim the original one
- // because it is non-extendable). This rarely happens so this is preferable
- // to having a separate pass over the string to count the code points.
- return result.getRange(0, j);
+ return result;
}
- List<int> get codeUnits {
+ List<int> get charCodes {
int len = this.length;
final result = new List<int>(len);
for (int i = 0; i < len; i++) {
- result[i] = this.codeUnitAt(i);
+ result[i] = this.charCodeAt(i);
}
return result;
}
@@ -377,91 +360,48 @@ class _OneByteString extends _StringBase implements String {
"_OneByteString can only be allocated by the VM");
}
+ // Checks for one-byte whitespaces only.
+ // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid
+ // whitespaces for one byte strings.
bool _isWhitespace(int codePoint) {
return
- (codePoint == 32) || // Space.
- (codePoint == 0xa0) || // No-break space.
+ (codePoint == 32) || // Space.
((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.
}
- int charCodeAt(int index) => codeUnitAt(index);
-
- List<int> get charCodes => codeUnits;
}
-class _TwoByteStringBase extends _StringBase {
- factory _TwoByteStringBase._uninstantiable() {
+class _TwoByteString extends _StringBase implements String {
+ factory _TwoByteString._uninstantiable() {
throw new UnsupportedError(
- "_TwoByteStringBase can't be instaniated");
+ "_TwoByteString can only be allocated by the VM");
}
- // Works for both code points and code units since all spaces are in the BMP.
+ // Checks for one-byte whitespaces only.
+ // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid
+ // whitespaces. Add checking for multi-byte whitespace codepoints.
bool _isWhitespace(int codePoint) {
return
- (codePoint == 32) || // Space.
- (codePoint == 0xa0) || // No-break space.
- ((9 <= codePoint) && (codePoint <= 13)) || // CR, LF, TAB, etc.
- (codePoint >= 0x1680 && // Optimization.
- (codePoint == 0x1680 || // Ogham space mark.
- codePoint == 0x180e || // Mongolian vowel separator.
- (codePoint >= 0x2000 && codePoint <= 0x200a) || // Wide/narrow spaces.
- codePoint == 0x2028 || // Line separator.
- codePoint == 0x2029 || // Paragraph separator.
- codePoint == 0x202f || // Narrow no-break space.
- codePoint == 0x205f || // Medium mathematical space.
- codePoint == 0x3000 || // Ideographic space.
- codePoint == 0xfeff)); // BOM code.
- }
-
- int charCodeAt(int index) {
- const int LEAD_SURROGATE_BASE = 0xd800;
- const int LEAD_SURROGATE_END = 0xdbff;
- const int TRAIL_SURROGATE_BASE = 0xdc00;
- const int TRAIL_SURROGATE_END = 0xdfff;
- const int MASK = 0x3ff;
- int code = codeUnitAt(index);
- if (code < LEAD_SURROGATE_BASE || code > LEAD_SURROGATE_END) return code;
- if (index + 1 >= length) return code;
- int trail = codeUnitAt(index + 1);
- if (trail < TRAIL_SURROGATE_BASE || trail > TRAIL_SURROGATE_END) {
- return code;
- }
- return String.SUPPLEMENTARY_CODE_POINT_BASE +
- ((code & MASK) << 10) + (trail & MASK);
- }
-
- // TODO(erikcorry): Fix this to use the new code point iterator when it is
- // available.
- List<int> get charCodes {
- int len = this.length;
- final result = new List<int>(len);
- bool supplementaryCharacterSeen = false;
- int i, j;
- for (i = j = 0; i < len; i++, j++) {
- int c = this.charCodeAt(i);
- // Check for supplementary plane character encoded as a UTF-16 surrogate
- // pair.
- if (c >= String.SUPPLEMENTARY_CODE_POINT_BASE) {
- i++;
- supplementaryCharacterSeen = true;
- }
- result[j] = c;
- }
- if (!supplementaryCharacterSeen) return result;
- // If we saw some non-basic plane characters, then we have to return a
- // slightly smaller array than expected (we can't trim the original one
- // because it is non-extendable). This rarely happens so this is preferable
- // to having a separate pass over the string to count the code points.
- return result.getRange(0, j);
+ (codePoint == 32) || // Space.
+ ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.
}
}
-class _TwoByteString extends _TwoByteStringBase implements String {
- factory _TwoByteString._uninstantiable() {
+class _FourByteString extends _StringBase implements String {
+ factory _FourByteString._uninstantiable() {
throw new UnsupportedError(
- "_TwoByteString can only be allocated by the VM");
+ "_FourByteString can only be allocated by the VM");
+ }
+
+ // Checks for one-byte whitespaces only.
+ // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid
+ // whitespaces. Add checking for multi-byte whitespace codepoints.
+ bool _isWhitespace(int codePoint) {
+ return
+ (codePoint == 32) || // Space.
+ ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.
}
}
@@ -472,24 +412,48 @@ class _ExternalOneByteString extends _StringBase implements String {
"_ExternalOneByteString can only be allocated by the VM");
}
+ // Checks for one-byte whitespaces only.
+ // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid
+ // whitespaces for one byte strings.
bool _isWhitespace(int codePoint) {
return
- (codePoint == 32) || // Space.
- (codePoint == 0xa0) || // No-break space.
+ (codePoint == 32) || // Space.
((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.
}
-
- int charCodeAt(int index) => codeUnitAt(index);
-
- List<int> get charCodes => codeUnits;
}
-class _ExternalTwoByteString extends _TwoByteStringBase implements String {
+class _ExternalTwoByteString extends _StringBase implements String {
factory _ExternalTwoByteString._uninstantiable() {
throw new UnsupportedError(
"_ExternalTwoByteString can only be allocated by the VM");
}
+
+ // Checks for one-byte whitespaces only.
+ // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid
+ // whitespaces. Add checking for multi-byte whitespace codepoints.
+ bool _isWhitespace(int codePoint) {
+ return
+ (codePoint == 32) || // Space.
+ ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.
+ }
+}
+
+
+class _ExternalFourByteString extends _StringBase implements String {
+ factory _ExternalFourByteString._uninstantiable() {
+ throw new UnsupportedError(
+ "ExternalFourByteString can only be allocated by the VM");
+ }
+
+ // Checks for one-byte whitespaces only.
+ // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid
+ // whitespaces. Add checking for multi-byte whitespace codepoints.
+ bool _isWhitespace(int codePoint) {
+ return
+ (codePoint == 32) || // Space.
+ ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.
+ }
}
« no previous file with comments | « runtime/lib/string.cc ('k') | runtime/lib/string_patch.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698