OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of dart.core; | 5 part of dart.core; |
6 | 6 |
7 /** | 7 /** |
8 * The String class represents sequences of characters. Strings are | 8 * The String class represents sequences of characters. Strings are |
9 * immutable. A string is represented by a sequence of Unicode UTF-16 | 9 * immutable. A string is represented by a sequence of Unicode UTF-16 |
10 * code units accessible through the [codeUnitAt] or the | 10 * code units accessible through the [codeUnitAt] or the |
(...skipping 10 matching lines...) Expand all Loading... |
21 /** | 21 /** |
22 * Allocates a new String for the specified [charCodes]. | 22 * Allocates a new String for the specified [charCodes]. |
23 * | 23 * |
24 * The [charCodes] can be UTF-16 code units or runes. If a char-code value is | 24 * The [charCodes] can be UTF-16 code units or runes. If a char-code value is |
25 * 16-bit it is copied verbatim. If it is greater than 16 bits it is | 25 * 16-bit it is copied verbatim. If it is greater than 16 bits it is |
26 * decomposed into a surrogate pair. | 26 * decomposed into a surrogate pair. |
27 */ | 27 */ |
28 external factory String.fromCharCodes(Iterable<int> charCodes); | 28 external factory String.fromCharCodes(Iterable<int> charCodes); |
29 | 29 |
30 /** | 30 /** |
31 * *Deprecated*. Use [String.fromCharCode] instead. | |
32 */ | |
33 @deprecated | |
34 factory String.character(int charCode) => new String.fromCharCode(charCode); | |
35 | |
36 /** | |
37 * Allocates a new String for the specified [charCode]. | 31 * Allocates a new String for the specified [charCode]. |
38 * | 32 * |
39 * The new string contains a single code unit if the [charCode] can be | 33 * The new string contains a single code unit if the [charCode] can be |
40 * represented by a single UTF-16 code unit. Otherwise the [length] is 2 and | 34 * represented by a single UTF-16 code unit. Otherwise the [length] is 2 and |
41 * the code units form a surrogate pair. | 35 * the code units form a surrogate pair. |
42 * | 36 * |
43 * It is allowed (though generally discouraged) to create a String with only | 37 * It is allowed (though generally discouraged) to create a String with only |
44 * one half of a surrogate pair. | 38 * one half of a surrogate pair. |
45 */ | 39 */ |
46 factory String.fromCharCode(int charCode) { | 40 factory String.fromCharCode(int charCode) { |
(...skipping 18 matching lines...) Expand all Loading... |
65 * // thus invalid UTF-16 strings: | 59 * // thus invalid UTF-16 strings: |
66 * clef[0]; // => "\uDBFF" | 60 * clef[0]; // => "\uDBFF" |
67 * clef[1]; // => "\uDFFD" | 61 * clef[1]; // => "\uDFFD" |
68 * | 62 * |
69 * This method is equivalent to | 63 * This method is equivalent to |
70 * `new String.fromCharCode(this.codeUnitAt(index))`. | 64 * `new String.fromCharCode(this.codeUnitAt(index))`. |
71 */ | 65 */ |
72 String operator [](int index); | 66 String operator [](int index); |
73 | 67 |
74 /** | 68 /** |
75 * Gets the scalar character code at the given [index]. | |
76 * | |
77 * *This method is deprecated. Please use [codeUnitAt] instead.* | |
78 */ | |
79 @deprecated | |
80 int charCodeAt(int index); | |
81 | |
82 /** | |
83 * Returns the 16-bit UTF-16 code unit at the given [index]. | 69 * Returns the 16-bit UTF-16 code unit at the given [index]. |
84 */ | 70 */ |
85 int codeUnitAt(int index); | 71 int codeUnitAt(int index); |
86 | 72 |
87 /** | 73 /** |
88 * The length of the string. | 74 * The length of the string. |
89 * | 75 * |
90 * Returns the number of UTF-16 code units in this string. The number | 76 * Returns the number of UTF-16 code units in this string. The number |
91 * of [runes] might be less, if the string contains characters outside | 77 * of [runes] might be less, if the string contains characters outside |
92 * the basic multilingual plane (plane 0). | 78 * the basic multilingual plane (plane 0). |
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
212 | 198 |
213 /** | 199 /** |
214 * Splits the string around matches of [pattern]. Returns | 200 * Splits the string around matches of [pattern]. Returns |
215 * a list of substrings. | 201 * a list of substrings. |
216 * | 202 * |
217 * Splitting with an empty string pattern (`""`) splits at UTF-16 code unit | 203 * Splitting with an empty string pattern (`""`) splits at UTF-16 code unit |
218 * boundaries and not at rune boundaries. The following two expressions | 204 * boundaries and not at rune boundaries. The following two expressions |
219 * are hence equivalent: | 205 * are hence equivalent: |
220 * | 206 * |
221 * string.split("") | 207 * string.split("") |
222 * string.codeUnits.map((unit) => new String.character(unit)) | 208 * string.codeUnits.map((unit) => new String.fromCharCode(unit)) |
223 * | 209 * |
224 * Unless it guaranteed that the string is in the basic multilingual plane | 210 * Unless it guaranteed that the string is in the basic multilingual plane |
225 * (meaning that each code unit represents a rune) it is often better to | 211 * (meaning that each code unit represents a rune) it is often better to |
226 * map the runes instead: | 212 * map the runes instead: |
227 * | 213 * |
228 * string.runes.map((rune) => new String.character(rune)) | 214 * string.runes.map((rune) => new String.fromCharCode(rune)) |
229 */ | 215 */ |
230 List<String> split(Pattern pattern); | 216 List<String> split(Pattern pattern); |
231 | 217 |
232 /** | 218 /** |
233 * Returns a list of the individual code-units converted to strings. | |
234 * | |
235 * *Deprecated* | |
236 * If you want to split on code-unit boundaries, use [split]. If you | |
237 * want to split on rune boundaries, use [runes] and map the result. | |
238 * | |
239 * Iterable<String> characters = | |
240 * string.runes.map((c) => new String.fromCharCode(c)); | |
241 */ | |
242 @deprecated | |
243 List<String> splitChars(); | |
244 | |
245 /** | |
246 * Splits the string on the [pattern], then converts each part and each match. | 219 * Splits the string on the [pattern], then converts each part and each match. |
247 * | 220 * |
248 * The pattern is used to split the string into parts and separating matches. | 221 * The pattern is used to split the string into parts and separating matches. |
249 * | 222 * |
250 * Each match is converted to a string by calling [onMatch]. If [onMatch] | 223 * Each match is converted to a string by calling [onMatch]. If [onMatch] |
251 * is omitted, the matched string is used. | 224 * is omitted, the matched string is used. |
252 * | 225 * |
253 * Each non-matched part is converted by a call to [onNonMatch]. If | 226 * Each non-matched part is converted by a call to [onNonMatch]. If |
254 * [onNonMatch] is omitted, the non-matching part is used. | 227 * [onNonMatch] is omitted, the non-matching part is used. |
255 * | 228 * |
256 * Then all the converted parts are combined into the resulting string. | 229 * Then all the converted parts are combined into the resulting string. |
257 */ | 230 */ |
258 String splitMapJoin(Pattern pattern, | 231 String splitMapJoin(Pattern pattern, |
259 {String onMatch(Match match), | 232 {String onMatch(Match match), |
260 String onNonMatch(String nonMatch)}); | 233 String onNonMatch(String nonMatch)}); |
261 | 234 |
262 /** | 235 /** |
263 * Returns a list of UTF-16 code units of this string. | 236 * Returns an unmodifiable list of the UTF-16 code units of this string. |
264 * | |
265 * *This getter is deprecated. Use [codeUnits] instead.* | |
266 */ | 237 */ |
267 List<int> get charCodes; | 238 List<int> get codeUnits; |
268 | |
269 /** | |
270 * Returns an iterable of the UTF-16 code units of this string. | |
271 */ | |
272 // TODO(floitsch): should it return a list? | |
273 // TODO(floitsch): make it a bidirectional iterator. | |
274 Iterable<int> get codeUnits; | |
275 | 239 |
276 /** | 240 /** |
277 * Returns an iterable of Unicode code-points of this string. | 241 * Returns an iterable of Unicode code-points of this string. |
278 * | 242 * |
279 * If the string contains surrogate pairs, they will be combined and returned | 243 * If the string contains surrogate pairs, they will be combined and returned |
280 * as one integer by this iterator. Unmatched surrogate halves are treated | 244 * as one integer by this iterator. Unmatched surrogate halves are treated |
281 * like valid 16-bit code-units. | 245 * like valid 16-bit code-units. |
282 */ | 246 */ |
283 Runes get runes; | 247 Runes get runes; |
284 | 248 |
(...skipping 19 matching lines...) Expand all Loading... |
304 final String string; | 268 final String string; |
305 Runes(this.string); | 269 Runes(this.string); |
306 | 270 |
307 RuneIterator get iterator => new RuneIterator(string); | 271 RuneIterator get iterator => new RuneIterator(string); |
308 | 272 |
309 int get last { | 273 int get last { |
310 if (string.length == 0) { | 274 if (string.length == 0) { |
311 throw new StateError("No elements."); | 275 throw new StateError("No elements."); |
312 } | 276 } |
313 int length = string.length; | 277 int length = string.length; |
314 int code = string.charCodeAt(length - 1); | 278 int code = string.codeUnitAt(length - 1); |
315 if (_isTrailSurrogate(code) && string.length > 1) { | 279 if (_isTrailSurrogate(code) && string.length > 1) { |
316 int previousCode = string.charCodeAt(length - 2); | 280 int previousCode = string.codeUnitAt(length - 2); |
317 if (_isLeadSurrogate(previousCode)) { | 281 if (_isLeadSurrogate(previousCode)) { |
318 return _combineSurrogatePair(previousCode, code); | 282 return _combineSurrogatePair(previousCode, code); |
319 } | 283 } |
320 } | 284 } |
321 return code; | 285 return code; |
322 } | 286 } |
323 | 287 |
324 } | 288 } |
325 | 289 |
326 // Is then code (a 16-bit unsigned integer) a UTF-16 lead surrogate. | 290 // Is then code (a 16-bit unsigned integer) a UTF-16 lead surrogate. |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
369 : string = string, _position = index, _nextPosition = index { | 333 : string = string, _position = index, _nextPosition = index { |
370 if (index < 0 || index > string.length) { | 334 if (index < 0 || index > string.length) { |
371 throw new RangeError.range(index, 0, string.length); | 335 throw new RangeError.range(index, 0, string.length); |
372 } | 336 } |
373 _checkSplitSurrogate(index); | 337 _checkSplitSurrogate(index); |
374 } | 338 } |
375 | 339 |
376 /** Throw an error if the index is in the middle of a surrogate pair. */ | 340 /** Throw an error if the index is in the middle of a surrogate pair. */ |
377 void _checkSplitSurrogate(int index) { | 341 void _checkSplitSurrogate(int index) { |
378 if (index > 0 && index < string.length && | 342 if (index > 0 && index < string.length && |
379 _isLeadSurrogate(string.charCodeAt(index - 1)) && | 343 _isLeadSurrogate(string.codeUnitAt(index - 1)) && |
380 _isTrailSurrogate(string.charCodeAt(index))) { | 344 _isTrailSurrogate(string.codeUnitAt(index))) { |
381 throw new ArgumentError("Index inside surrogate pair: $index"); | 345 throw new ArgumentError("Index inside surrogate pair: $index"); |
382 } | 346 } |
383 } | 347 } |
384 | 348 |
385 /** | 349 /** |
386 * Returns the starting position of the current rune in the string. | 350 * Returns the starting position of the current rune in the string. |
387 * | 351 * |
388 * Returns null if the [current] rune is null. | 352 * Returns null if the [current] rune is null. |
389 */ | 353 */ |
390 int get rawIndex => (_position != _nextPosition) ? _position : null; | 354 int get rawIndex => (_position != _nextPosition) ? _position : null; |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
448 if (_position + 1 == _nextPosition) return string[_position]; | 412 if (_position + 1 == _nextPosition) return string[_position]; |
449 return string.substring(_position, _nextPosition); | 413 return string.substring(_position, _nextPosition); |
450 } | 414 } |
451 | 415 |
452 bool moveNext() { | 416 bool moveNext() { |
453 _position = _nextPosition; | 417 _position = _nextPosition; |
454 if (_position == string.length) { | 418 if (_position == string.length) { |
455 _currentCodePoint = null; | 419 _currentCodePoint = null; |
456 return false; | 420 return false; |
457 } | 421 } |
458 int codeUnit = string.charCodeAt(_position); | 422 int codeUnit = string.codeUnitAt(_position); |
459 int nextPosition = _position + 1; | 423 int nextPosition = _position + 1; |
460 if (_isLeadSurrogate(codeUnit) && nextPosition < string.length) { | 424 if (_isLeadSurrogate(codeUnit) && nextPosition < string.length) { |
461 int nextCodeUnit = string.charCodeAt(nextPosition); | 425 int nextCodeUnit = string.codeUnitAt(nextPosition); |
462 if (_isTrailSurrogate(nextCodeUnit)) { | 426 if (_isTrailSurrogate(nextCodeUnit)) { |
463 _nextPosition = nextPosition + 1; | 427 _nextPosition = nextPosition + 1; |
464 _currentCodePoint = _combineSurrogatePair(codeUnit, nextCodeUnit); | 428 _currentCodePoint = _combineSurrogatePair(codeUnit, nextCodeUnit); |
465 return true; | 429 return true; |
466 } | 430 } |
467 } | 431 } |
468 _nextPosition = nextPosition; | 432 _nextPosition = nextPosition; |
469 _currentCodePoint = codeUnit; | 433 _currentCodePoint = codeUnit; |
470 return true; | 434 return true; |
471 } | 435 } |
472 | 436 |
473 bool movePrevious() { | 437 bool movePrevious() { |
474 _nextPosition = _position; | 438 _nextPosition = _position; |
475 if (_position == 0) { | 439 if (_position == 0) { |
476 _currentCodePoint = null; | 440 _currentCodePoint = null; |
477 return false; | 441 return false; |
478 } | 442 } |
479 int position = _position - 1; | 443 int position = _position - 1; |
480 int codeUnit = string.charCodeAt(position); | 444 int codeUnit = string.codeUnitAt(position); |
481 if (_isTrailSurrogate(codeUnit) && position > 0) { | 445 if (_isTrailSurrogate(codeUnit) && position > 0) { |
482 int prevCodeUnit = string.charCodeAt(position - 1); | 446 int prevCodeUnit = string.codeUnitAt(position - 1); |
483 if (_isLeadSurrogate(prevCodeUnit)) { | 447 if (_isLeadSurrogate(prevCodeUnit)) { |
484 _position = position - 1; | 448 _position = position - 1; |
485 _currentCodePoint = _combineSurrogatePair(prevCodeUnit, codeUnit); | 449 _currentCodePoint = _combineSurrogatePair(prevCodeUnit, codeUnit); |
486 return true; | 450 return true; |
487 } | 451 } |
488 } | 452 } |
489 _position = position; | 453 _position = position; |
490 _currentCodePoint = codeUnit; | 454 _currentCodePoint = codeUnit; |
491 return true; | 455 return true; |
492 } | 456 } |
493 } | 457 } |
494 | |
495 /** | |
496 * An [Iterable] of the UTF-16 code units of a [String] in index order. | |
497 */ | |
498 class CodeUnits extends ListIterable<int> { | |
499 /** The string that this is the code units of. */ | |
500 String string; | |
501 | |
502 CodeUnits(this.string); | |
503 | |
504 int get length => string.length; | |
505 int elementAt(int i) => string.codeUnitAt(i); | |
506 } | |
OLD | NEW |