sdk/lib/core/string.dart - Issue 11411092: Revert "Add some support for the code-point code-unit distinction."

Side by Side Diff: sdk/lib/core/string.dart

Issue 11411092: Revert "Add some support for the code-point code-unit distinction." (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Created 8 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 /**	5 /**

6 * The String class represents character strings. Strings are	6 * The String class represents character strings. Strings are

7 * immutable. A string is represented by a list of 16-bit UTF-16	7 * immutable. A string is represented by a list of 32-bit Unicode

8 * code units accessible through the [codeUnitAt] or the [codeUnits]	8 * scalar character codes accessible through the [charCodeAt] or the

9 * methods. The corresponding Unicode code points are available with	9 * [charCodes] method.

10 * [charCodeAt] or the [charCodes] method.

11 */	10 */

12 abstract class String implements Comparable, Pattern, Sequence<String> {	11 abstract class String implements Comparable, Pattern, Sequence<String> {

13 // Unicode does not allow for code points above this limit.	12 /**

14 static const int MAX_CODE_POINT = 0x10ffff;	13 * Allocates a new String for the specified [charCodes].

15 // A Dart string is represented by UTF-16 code units which must be <= 0xffff.	14 */

16 static const int MAX_CODE_UNIT = 0xffff;	15 external factory String.fromCharCodes(List<int> charCodes);

17 // Unicode does not allow for code points in this range.

18 static const int UNICODE_RESERVED_AREA_START = 0xd800;

19 static const int UNICODE_RESERVED_AREA_END = 0xdfff;

20 // Unicode code points above this limit are coded as two code units in Dart's

21 // UTF-16 string.

22 static const int SUPPLEMENTARY_CODE_POINT_BASE = 0x10000;

23	16

24 /**	17 /**

25 * Allocates a new String for the specified 21 bit Unicode [codePoints].	18 * Gets the character (as [String]) at the given [index].

26 * Throws an ArgumentError if any of the codePoints are not ints between 0 and

27 * MAX_CODE_POINT. Also throws an ArgumentError if any of the code points

28 * are in the area reserved for UTF-16 surrogate pairs.

29 */

30 factory String.fromCharCodes(List<int> charCodes) {

31 int pairs = 0;

32 // There is some duplication of constants here relative to the ones in

33 // lib/utf/utf16.dart because we don't want core to depend on the utf

34 // library.

35 const int MASK = 0x3ff;

36 const int LEAD_SURROGATE_BASE = UNICODE_RESERVED_AREA_START;

37 const int TRAIL_SURROGATE_BASE = 0xdc00;

38 for (var code in charCodes) {

39 if (code is !int \|\| code < 0) throw new ArgumentError(charCodes);

40 if (code >= UNICODE_RESERVED_AREA_START) {

41 if (code > MAX_CODE_UNIT) {

42 pairs++;

43 }

44 if (code <= UNICODE_RESERVED_AREA_END \|\| code > MAX_CODE_POINT) {

45 // No surrogates or out-of-range code points allowed in the input.

46 throw new ArgumentError(charCodes);

47 }

48 }

49 }

50 // Fast case - there are no surrogate pairs.

51 if (pairs == 0) return new String.fromCodeUnits(charCodes);

52 var codeUnits = new List<int>(pairs + charCodes.length);

53 int j = 0;

54 for (int code in charCodes) {

55 if (code >= SUPPLEMENTARY_CODE_POINT_BASE) {

56 codeUnits[j++] = LEAD_SURROGATE_BASE +

57 (((code - SUPPLEMENTARY_CODE_POINT_BASE) >> 10) & MASK);

58 codeUnits[j++] = TRAIL_SURROGATE_BASE + (code & MASK);

59 } else {

60 codeUnits[j++] = code;

61 }

62 }

63 return new String.fromCodeUnits(codeUnits);

64 }

65

66 /**

67 * Allocates a new String for the specified 16 bit UTF-16 [codeUnits].

68 */

69 external factory String.fromCodeUnits(List<int> codeUnits);

70

71 /**

72 * Gets the Unicode character (as [String]) at the given [index]. This

73 * routine can return a single combining character (accent) that would

74 * normally be displayed together with the character it is modifying.

75 * If the index corresponds to a surrogate code unit then a one-code-unit

76 * string is returned containing that unpaired surrogate code unit.

77 */	19 */

78 String operator [](int index);	20 String operator [](int index);

79	21

80 /**	22 /**

81 * Gets the 21 bit Unicode code point at the given [index]. If the code units	23 * Gets the scalar character code at the given [index].

82 * at index and index + 1 form a valid surrogate pair then this function

83 * returns the non-basic plane code point that they represent. If the code

84 * unit at index is a trailing surrogate or a leading surrogate that is not

85 * followed by a trailing surrogate then the raw code unit is returned.

86 */	24 */

87 int charCodeAt(int index);	25 int charCodeAt(int index);

88	26

89 /**	27 /**

90 * Gets the 16 bit UTF-16 code unit at the given index.	28 * The length of the string.

91 */

92 int codeUnitAt(int index);

93

94

95 /**

96 * The length of the string, measured in UTF-16 code units.

97 */	29 */

98 int get length;	30 int get length;

99	31

100 /**	32 /**

101 * Returns whether the two strings are equal. This method compares	33 * Returns whether the two strings are equal. This method compares

102 * each individual UTF-16 code unit. No Unicode normalization is	34 * each individual scalar character codes of the strings.

103 * performed (accent composition/decomposition).

104 */	35 */

105 bool operator ==(String other);	36 bool operator ==(String other);

106	37

107 /**	38 /**

108 * Returns whether this string ends with [other].	39 * Returns whether this string ends with [other].

109 */	40 */

110 bool endsWith(String other);	41 bool endsWith(String other);

111	42

112 /**	43 /**

113 * Returns whether this string starts with [other].	44 * Returns whether this string starts with [other].

(...skipping 24 matching lines...) Expand all Loading...
138 */	69 */

139 String concat(String other);	70 String concat(String other);

140	71

141 /**	72 /**

142 * Returns a substring of this string in the given range.	73 * Returns a substring of this string in the given range.

143 * [startIndex] is inclusive and [endIndex] is exclusive.	74 * [startIndex] is inclusive and [endIndex] is exclusive.

144 */	75 */

145 String substring(int startIndex, [int endIndex]);	76 String substring(int startIndex, [int endIndex]);

146	77

147 /**	78 /**

148 * Removes leading and trailing whitespace from a string. If the string	79 * Removes leading and trailing whitespace from a string. If the

149 * contains leading or trailing whitespace a new string with no leading and	80 * string contains leading or trailing whitespace a new string with

150 * no trailing whitespace is returned. Otherwise, the string itself is	81 * no leading and no trailing whitespace is returned. Otherwise, the

151 * returned. Whitespace is defined as every Unicode character in the Zs, Zl	82 * string itself is returned.

152 * and Zp categories (this includes no-break space), the spacing control

153 * characters from 9 to 13 (tab, lf, vtab, ff and cr), and 0xfeff the BOM

154 * character.

155 */	83 */

156 String trim();	84 String trim();

157	85

158 /**	86 /**

159 * Returns whether this string contains [other] starting	87 * Returns whether this string contains [other] starting

160 * at [startIndex] (inclusive).	88 * at [startIndex] (inclusive).

161 */	89 */

162 bool contains(Pattern other, [int startIndex]);	90 bool contains(Pattern other, [int startIndex]);

163	91

164 /**	92 /**

165 * Returns a new string where the first occurence of [from] in this string	93 * Returns a new string where the first occurence of [from] in this string

166 * is replaced with [to].	94 * is replaced with [to].

167 */	95 */

168 String replaceFirst(Pattern from, String to);	96 String replaceFirst(Pattern from, String to);

169	97

170 /**	98 /**

171 * Returns a new string where all occurences of [from] in this string	99 * Returns a new string where all occurences of [from] in this string

172 * are replaced with [to].	100 * are replaced with [to].

173 */	101 */

174 String replaceAll(Pattern from, String to);	102 String replaceAll(Pattern from, String to);

175	103

176 /**	104 /**

177 * Splits the string around matches of [pattern]. Returns	105 * Splits the string around matches of [pattern]. Returns

178 * a list of substrings.	106 * a list of substrings.

179 */	107 */

180 List<String> split(Pattern pattern);	108 List<String> split(Pattern pattern);

181	109

182 /**	110 /**

183 * Returns a list of the characters of this string. No string normalization	111 * Returns a list of the characters of this string.

184 * is performed so unprecomposed combining characters (accents) may be found

185 * in the list. Valid surrogate pairs are returned as one string.

186 */	112 */

187 List<String> splitChars();	113 List<String> splitChars();

188	114

189 /**	115 /**

190 * Returns a list of the 21 bit Unicode code points of this string.	116 * Returns a list of the scalar character codes of this string.

191 */	117 */

192 List<int> get charCodes;	118 List<int> get charCodes;

193	119

194 /**	120 /**

195 * Returns a list of the 16 bit UTF-16 code units of this string.

196 */

197 List<int> get codeUnits;

198

199 /**

200 * If this string is not already all lower case, returns a new string	121 * If this string is not already all lower case, returns a new string

201 * where all characters are made lower case. Returns [:this:] otherwise.	122 * where all characters are made lower case. Returns [:this:] otherwise.

202 */	123 */

203 String toLowerCase();	124 String toLowerCase();

204	125

205 /**	126 /**

206 * If this string is not already all uper case, returns a new string	127 * If this string is not already all uper case, returns a new string

207 * where all characters are made upper case. Returns [:this:] otherwise.	128 * where all characters are made upper case. Returns [:this:] otherwise.

208 */	129 */

209 String toUpperCase();	130 String toUpperCase();

210 }	131 }

OLD	NEW

« no previous file with comments | « sdk/lib/_internal/compiler/implementation/util/util.dart ('k') | sdk/lib/io/string_stream.dart » ('j') | no next file with comments »