Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(114)

Unified Diff: sdk/lib/core/string.dart

Issue 11368138: Add some support for the code-point code-unit distinction. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: sdk/lib/core/string.dart
diff --git a/sdk/lib/core/string.dart b/sdk/lib/core/string.dart
index ad4aefee7e1fca8b4e591dd6fed3f8de54c22e9d..8486ce8cf2de7ac21d3207b08fe1944758434b76 100644
--- a/sdk/lib/core/string.dart
+++ b/sdk/lib/core/string.dart
@@ -4,36 +4,55 @@
/**
* The String class represents character strings. Strings are
- * immutable. A string is represented by a list of 32-bit Unicode
- * scalar character codes accessible through the [charCodeAt] or the
- * [charCodes] method.
+ * immutable. A string is represented by a list of 16-bit UTF-16
+ * code units accessible through the [codeUnitAt] or the [codeUnits]
+ * methods. The corresponding Unicode code points are available with
+ * [charCodeAt] or the [charCodes] method.
*/
interface String
extends Comparable, Pattern, Sequence<String>
default _StringImpl {
/**
- * Allocates a new String for the specified [charCodes].
+ * Allocates a new String for the specified 21 bit Unicode [codePoints].
*/
- String.fromCharCodes(List<int> charCodes);
+ String.fromCharCodes(List<int> codePoints);
/**
- * Gets the character (as [String]) at the given [index].
+ * Allocates a new String for the specified 16 bit UTF-16 [codeUnits].
+ */
+ String.fromCodeUnits(List<int> codeUnits);
+
+ /**
+ * Gets the Unicode character (as [String]) at the given [index]. This
+ * routine can return a single combining character (accent) that would
+ * normally be displayed together with the character it is modifying.
+ * If the index corresponds to the first of two UTF-16 surrogate pair
floitsch 2012/11/08 15:28:21 Update comment.
erikcorry 2012/11/15 13:28:25 Done.
+ * code units then it will return a string containing the Unicode
+ * character corresponding to the pair.
*/
String operator [](int index);
/**
- * Gets the scalar character code at the given [index].
+ * Gets the 21 bit Unicode code point at the given [index]. Surrogate
+ * pairs are handled as in [operator []].
floitsch 2012/11/08 15:28:21 Update comment.
erikcorry 2012/11/15 13:28:25 Done.
*/
int charCodeAt(int index);
/**
- * The length of the string.
+ * Gets the 16 bit UTF-16 code unit at the given index.
floitsch 2012/11/08 15:28:21 Update comment (now same as [operator []]).
erikcorry 2012/11/15 13:28:25 Done.
erikcorry 2012/11/15 13:28:25 Done.
+ */
+ int codeUnitAt(int index);
+
+
+ /**
+ * The length of the string, measured in UTF-16 code units.
*/
int get length;
/**
* Returns whether the two strings are equal. This method compares
- * each individual scalar character codes of the strings.
+ * each individual UTF-16 code unit. No Unicode normalization is
+ * performed (accent composition/decomposition).
*/
bool operator ==(String other);
@@ -110,16 +129,23 @@ interface String
List<String> split(Pattern pattern);
/**
- * Returns a list of the characters of this string.
+ * Returns a list of the characters of this string. No string normalization
+ * is performed so unprecomposed combining characters (accents) may be found
+ * in the list.
*/
List<String> splitChars();
/**
- * Returns a list of the scalar character codes of this string.
+ * Returns a list of the 21 bit Unicode code points of this string.
*/
List<int> get charCodes;
/**
+ * Returns a list of the 16 bit UTF-16 code units of this string.
+ */
+ List<int> get codeUnits;
+
+ /**
* If this string is not already all lower case, returns a new string
* where all characters are made lower case. Returns [:this:] otherwise.
*/
@@ -134,10 +160,14 @@ interface String
class _StringImpl {
/**
- * Factory implementation of String.fromCharCodes:
- * Allocates a new String for the specified [charCodes].
+ * Factory implementation of String.fromCharCodes.
+ */
+ external factory String.fromCharCodes(List<int> codePoints);
+
+ /**
+ * Factory implementation of String.fromCodeUnits.
*/
- external factory String.fromCharCodes(List<int> charCodes);
+ external factory String.fromCodeUnits(List<int> codeUnits);
/**
* Joins all the given strings to create a new string.

Powered by Google App Engine
This is Rietveld 408576698