Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(300)

Side by Side Diff: sdk/lib/core/string.dart

Issue 11411092: Revert "Add some support for the code-point code-unit distinction." (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « sdk/lib/_internal/compiler/implementation/util/util.dart ('k') | sdk/lib/io/string_stream.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 /** 5 /**
6 * The String class represents character strings. Strings are 6 * The String class represents character strings. Strings are
7 * immutable. A string is represented by a list of 16-bit UTF-16 7 * immutable. A string is represented by a list of 32-bit Unicode
8 * code units accessible through the [codeUnitAt] or the [codeUnits] 8 * scalar character codes accessible through the [charCodeAt] or the
9 * methods. The corresponding Unicode code points are available with 9 * [charCodes] method.
10 * [charCodeAt] or the [charCodes] method.
11 */ 10 */
12 abstract class String implements Comparable, Pattern, Sequence<String> { 11 abstract class String implements Comparable, Pattern, Sequence<String> {
13 // Unicode does not allow for code points above this limit. 12 /**
14 static const int MAX_CODE_POINT = 0x10ffff; 13 * Allocates a new String for the specified [charCodes].
15 // A Dart string is represented by UTF-16 code units which must be <= 0xffff. 14 */
16 static const int MAX_CODE_UNIT = 0xffff; 15 external factory String.fromCharCodes(List<int> charCodes);
17 // Unicode does not allow for code points in this range.
18 static const int UNICODE_RESERVED_AREA_START = 0xd800;
19 static const int UNICODE_RESERVED_AREA_END = 0xdfff;
20 // Unicode code points above this limit are coded as two code units in Dart's
21 // UTF-16 string.
22 static const int SUPPLEMENTARY_CODE_POINT_BASE = 0x10000;
23 16
24 /** 17 /**
25 * Allocates a new String for the specified 21 bit Unicode [codePoints]. 18 * Gets the character (as [String]) at the given [index].
26 * Throws an ArgumentError if any of the codePoints are not ints between 0 and
27 * MAX_CODE_POINT. Also throws an ArgumentError if any of the code points
28 * are in the area reserved for UTF-16 surrogate pairs.
29 */
30 factory String.fromCharCodes(List<int> charCodes) {
31 int pairs = 0;
32 // There is some duplication of constants here relative to the ones in
33 // lib/utf/utf16.dart because we don't want core to depend on the utf
34 // library.
35 const int MASK = 0x3ff;
36 const int LEAD_SURROGATE_BASE = UNICODE_RESERVED_AREA_START;
37 const int TRAIL_SURROGATE_BASE = 0xdc00;
38 for (var code in charCodes) {
39 if (code is !int || code < 0) throw new ArgumentError(charCodes);
40 if (code >= UNICODE_RESERVED_AREA_START) {
41 if (code > MAX_CODE_UNIT) {
42 pairs++;
43 }
44 if (code <= UNICODE_RESERVED_AREA_END || code > MAX_CODE_POINT) {
45 // No surrogates or out-of-range code points allowed in the input.
46 throw new ArgumentError(charCodes);
47 }
48 }
49 }
50 // Fast case - there are no surrogate pairs.
51 if (pairs == 0) return new String.fromCodeUnits(charCodes);
52 var codeUnits = new List<int>(pairs + charCodes.length);
53 int j = 0;
54 for (int code in charCodes) {
55 if (code >= SUPPLEMENTARY_CODE_POINT_BASE) {
56 codeUnits[j++] = LEAD_SURROGATE_BASE +
57 (((code - SUPPLEMENTARY_CODE_POINT_BASE) >> 10) & MASK);
58 codeUnits[j++] = TRAIL_SURROGATE_BASE + (code & MASK);
59 } else {
60 codeUnits[j++] = code;
61 }
62 }
63 return new String.fromCodeUnits(codeUnits);
64 }
65
66 /**
67 * Allocates a new String for the specified 16 bit UTF-16 [codeUnits].
68 */
69 external factory String.fromCodeUnits(List<int> codeUnits);
70
71 /**
72 * Gets the Unicode character (as [String]) at the given [index]. This
73 * routine can return a single combining character (accent) that would
74 * normally be displayed together with the character it is modifying.
75 * If the index corresponds to a surrogate code unit then a one-code-unit
76 * string is returned containing that unpaired surrogate code unit.
77 */ 19 */
78 String operator [](int index); 20 String operator [](int index);
79 21
80 /** 22 /**
81 * Gets the 21 bit Unicode code point at the given [index]. If the code units 23 * Gets the scalar character code at the given [index].
82 * at index and index + 1 form a valid surrogate pair then this function
83 * returns the non-basic plane code point that they represent. If the code
84 * unit at index is a trailing surrogate or a leading surrogate that is not
85 * followed by a trailing surrogate then the raw code unit is returned.
86 */ 24 */
87 int charCodeAt(int index); 25 int charCodeAt(int index);
88 26
89 /** 27 /**
90 * Gets the 16 bit UTF-16 code unit at the given index. 28 * The length of the string.
91 */
92 int codeUnitAt(int index);
93
94
95 /**
96 * The length of the string, measured in UTF-16 code units.
97 */ 29 */
98 int get length; 30 int get length;
99 31
100 /** 32 /**
101 * Returns whether the two strings are equal. This method compares 33 * Returns whether the two strings are equal. This method compares
102 * each individual UTF-16 code unit. No Unicode normalization is 34 * each individual scalar character codes of the strings.
103 * performed (accent composition/decomposition).
104 */ 35 */
105 bool operator ==(String other); 36 bool operator ==(String other);
106 37
107 /** 38 /**
108 * Returns whether this string ends with [other]. 39 * Returns whether this string ends with [other].
109 */ 40 */
110 bool endsWith(String other); 41 bool endsWith(String other);
111 42
112 /** 43 /**
113 * Returns whether this string starts with [other]. 44 * Returns whether this string starts with [other].
(...skipping 24 matching lines...) Expand all
138 */ 69 */
139 String concat(String other); 70 String concat(String other);
140 71
141 /** 72 /**
142 * Returns a substring of this string in the given range. 73 * Returns a substring of this string in the given range.
143 * [startIndex] is inclusive and [endIndex] is exclusive. 74 * [startIndex] is inclusive and [endIndex] is exclusive.
144 */ 75 */
145 String substring(int startIndex, [int endIndex]); 76 String substring(int startIndex, [int endIndex]);
146 77
147 /** 78 /**
148 * Removes leading and trailing whitespace from a string. If the string 79 * Removes leading and trailing whitespace from a string. If the
149 * contains leading or trailing whitespace a new string with no leading and 80 * string contains leading or trailing whitespace a new string with
150 * no trailing whitespace is returned. Otherwise, the string itself is 81 * no leading and no trailing whitespace is returned. Otherwise, the
151 * returned. Whitespace is defined as every Unicode character in the Zs, Zl 82 * string itself is returned.
152 * and Zp categories (this includes no-break space), the spacing control
153 * characters from 9 to 13 (tab, lf, vtab, ff and cr), and 0xfeff the BOM
154 * character.
155 */ 83 */
156 String trim(); 84 String trim();
157 85
158 /** 86 /**
159 * Returns whether this string contains [other] starting 87 * Returns whether this string contains [other] starting
160 * at [startIndex] (inclusive). 88 * at [startIndex] (inclusive).
161 */ 89 */
162 bool contains(Pattern other, [int startIndex]); 90 bool contains(Pattern other, [int startIndex]);
163 91
164 /** 92 /**
165 * Returns a new string where the first occurence of [from] in this string 93 * Returns a new string where the first occurence of [from] in this string
166 * is replaced with [to]. 94 * is replaced with [to].
167 */ 95 */
168 String replaceFirst(Pattern from, String to); 96 String replaceFirst(Pattern from, String to);
169 97
170 /** 98 /**
171 * Returns a new string where all occurences of [from] in this string 99 * Returns a new string where all occurences of [from] in this string
172 * are replaced with [to]. 100 * are replaced with [to].
173 */ 101 */
174 String replaceAll(Pattern from, String to); 102 String replaceAll(Pattern from, String to);
175 103
176 /** 104 /**
177 * Splits the string around matches of [pattern]. Returns 105 * Splits the string around matches of [pattern]. Returns
178 * a list of substrings. 106 * a list of substrings.
179 */ 107 */
180 List<String> split(Pattern pattern); 108 List<String> split(Pattern pattern);
181 109
182 /** 110 /**
183 * Returns a list of the characters of this string. No string normalization 111 * Returns a list of the characters of this string.
184 * is performed so unprecomposed combining characters (accents) may be found
185 * in the list. Valid surrogate pairs are returned as one string.
186 */ 112 */
187 List<String> splitChars(); 113 List<String> splitChars();
188 114
189 /** 115 /**
190 * Returns a list of the 21 bit Unicode code points of this string. 116 * Returns a list of the scalar character codes of this string.
191 */ 117 */
192 List<int> get charCodes; 118 List<int> get charCodes;
193 119
194 /** 120 /**
195 * Returns a list of the 16 bit UTF-16 code units of this string.
196 */
197 List<int> get codeUnits;
198
199 /**
200 * If this string is not already all lower case, returns a new string 121 * If this string is not already all lower case, returns a new string
201 * where all characters are made lower case. Returns [:this:] otherwise. 122 * where all characters are made lower case. Returns [:this:] otherwise.
202 */ 123 */
203 String toLowerCase(); 124 String toLowerCase();
204 125
205 /** 126 /**
206 * If this string is not already all uper case, returns a new string 127 * If this string is not already all uper case, returns a new string
207 * where all characters are made upper case. Returns [:this:] otherwise. 128 * where all characters are made upper case. Returns [:this:] otherwise.
208 */ 129 */
209 String toUpperCase(); 130 String toUpperCase();
210 } 131 }
OLDNEW
« no previous file with comments | « sdk/lib/_internal/compiler/implementation/util/util.dart ('k') | sdk/lib/io/string_stream.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698