| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 part of dart.core; | 5 part of dart.core; |
| 6 | 6 |
| 7 /** | 7 /** |
| 8 * The String class represents sequences of characters. Strings are | 8 * The String class represents sequences of characters. Strings are |
| 9 * immutable. A string is represented by a sequence of Unicode UTF-16 | 9 * immutable. A string is represented by a sequence of Unicode UTF-16 |
| 10 * code units accessible through the [codeUnitAt] or the | 10 * code units accessible through the [codeUnitAt] or the |
| (...skipping 10 matching lines...) Expand all Loading... |
| 21 /** | 21 /** |
| 22 * Allocates a new String for the specified [charCodes]. | 22 * Allocates a new String for the specified [charCodes]. |
| 23 * | 23 * |
| 24 * The [charCodes] can be UTF-16 code units or runes. If a char-code value is | 24 * The [charCodes] can be UTF-16 code units or runes. If a char-code value is |
| 25 * 16-bit it is copied verbatim. If it is greater than 16 bits it is | 25 * 16-bit it is copied verbatim. If it is greater than 16 bits it is |
| 26 * decomposed into a surrogate pair. | 26 * decomposed into a surrogate pair. |
| 27 */ | 27 */ |
| 28 external factory String.fromCharCodes(Iterable<int> charCodes); | 28 external factory String.fromCharCodes(Iterable<int> charCodes); |
| 29 | 29 |
| 30 /** | 30 /** |
| 31 * *Deprecated*. Use [String.fromCharCode] instead. | |
| 32 */ | |
| 33 @deprecated | |
| 34 factory String.character(int charCode) => new String.fromCharCode(charCode); | |
| 35 | |
| 36 /** | |
| 37 * Allocates a new String for the specified [charCode]. | 31 * Allocates a new String for the specified [charCode]. |
| 38 * | 32 * |
| 39 * The new string contains a single code unit if the [charCode] can be | 33 * The new string contains a single code unit if the [charCode] can be |
| 40 * represented by a single UTF-16 code unit. Otherwise the [length] is 2 and | 34 * represented by a single UTF-16 code unit. Otherwise the [length] is 2 and |
| 41 * the code units form a surrogate pair. | 35 * the code units form a surrogate pair. |
| 42 * | 36 * |
| 43 * It is allowed (though generally discouraged) to create a String with only | 37 * It is allowed (though generally discouraged) to create a String with only |
| 44 * one half of a surrogate pair. | 38 * one half of a surrogate pair. |
| 45 */ | 39 */ |
| 46 factory String.fromCharCode(int charCode) { | 40 factory String.fromCharCode(int charCode) { |
| (...skipping 18 matching lines...) Expand all Loading... |
| 65 * // thus invalid UTF-16 strings: | 59 * // thus invalid UTF-16 strings: |
| 66 * clef[0]; // => "\uDBFF" | 60 * clef[0]; // => "\uDBFF" |
| 67 * clef[1]; // => "\uDFFD" | 61 * clef[1]; // => "\uDFFD" |
| 68 * | 62 * |
| 69 * This method is equivalent to | 63 * This method is equivalent to |
| 70 * `new String.fromCharCode(this.codeUnitAt(index))`. | 64 * `new String.fromCharCode(this.codeUnitAt(index))`. |
| 71 */ | 65 */ |
| 72 String operator [](int index); | 66 String operator [](int index); |
| 73 | 67 |
| 74 /** | 68 /** |
| 75 * Gets the scalar character code at the given [index]. | |
| 76 * | |
| 77 * *This method is deprecated. Please use [codeUnitAt] instead.* | |
| 78 */ | |
| 79 @deprecated | |
| 80 int charCodeAt(int index); | |
| 81 | |
| 82 /** | |
| 83 * Returns the 16-bit UTF-16 code unit at the given [index]. | 69 * Returns the 16-bit UTF-16 code unit at the given [index]. |
| 84 */ | 70 */ |
| 85 int codeUnitAt(int index); | 71 int codeUnitAt(int index); |
| 86 | 72 |
| 87 /** | 73 /** |
| 88 * The length of the string. | 74 * The length of the string. |
| 89 * | 75 * |
| 90 * Returns the number of UTF-16 code units in this string. The number | 76 * Returns the number of UTF-16 code units in this string. The number |
| 91 * of [runes] might be less, if the string contains characters outside | 77 * of [runes] might be less, if the string contains characters outside |
| 92 * the basic multilingual plane (plane 0). | 78 * the basic multilingual plane (plane 0). |
| (...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 212 | 198 |
| 213 /** | 199 /** |
| 214 * Splits the string around matches of [pattern]. Returns | 200 * Splits the string around matches of [pattern]. Returns |
| 215 * a list of substrings. | 201 * a list of substrings. |
| 216 * | 202 * |
| 217 * Splitting with an empty string pattern (`""`) splits at UTF-16 code unit | 203 * Splitting with an empty string pattern (`""`) splits at UTF-16 code unit |
| 218 * boundaries and not at rune boundaries. The following two expressions | 204 * boundaries and not at rune boundaries. The following two expressions |
| 219 * are hence equivalent: | 205 * are hence equivalent: |
| 220 * | 206 * |
| 221 * string.split("") | 207 * string.split("") |
| 222 * string.codeUnits.map((unit) => new String.character(unit)) | 208 * string.codeUnits.map((unit) => new String.fromCharCode(unit)) |
| 223 * | 209 * |
| 224 * Unless it guaranteed that the string is in the basic multilingual plane | 210 * Unless it guaranteed that the string is in the basic multilingual plane |
| 225 * (meaning that each code unit represents a rune) it is often better to | 211 * (meaning that each code unit represents a rune) it is often better to |
| 226 * map the runes instead: | 212 * map the runes instead: |
| 227 * | 213 * |
| 228 * string.runes.map((rune) => new String.character(rune)) | 214 * string.runes.map((rune) => new String.fromCharCode(rune)) |
| 229 */ | 215 */ |
| 230 List<String> split(Pattern pattern); | 216 List<String> split(Pattern pattern); |
| 231 | 217 |
| 232 /** | 218 /** |
| 233 * Returns a list of the individual code-units converted to strings. | |
| 234 * | |
| 235 * *Deprecated* | |
| 236 * If you want to split on code-unit boundaries, use [split]. If you | |
| 237 * want to split on rune boundaries, use [runes] and map the result. | |
| 238 * | |
| 239 * Iterable<String> characters = | |
| 240 * string.runes.map((c) => new String.fromCharCode(c)); | |
| 241 */ | |
| 242 @deprecated | |
| 243 List<String> splitChars(); | |
| 244 | |
| 245 /** | |
| 246 * Splits the string on the [pattern], then converts each part and each match. | 219 * Splits the string on the [pattern], then converts each part and each match. |
| 247 * | 220 * |
| 248 * The pattern is used to split the string into parts and separating matches. | 221 * The pattern is used to split the string into parts and separating matches. |
| 249 * | 222 * |
| 250 * Each match is converted to a string by calling [onMatch]. If [onMatch] | 223 * Each match is converted to a string by calling [onMatch]. If [onMatch] |
| 251 * is omitted, the matched string is used. | 224 * is omitted, the matched string is used. |
| 252 * | 225 * |
| 253 * Each non-matched part is converted by a call to [onNonMatch]. If | 226 * Each non-matched part is converted by a call to [onNonMatch]. If |
| 254 * [onNonMatch] is omitted, the non-matching part is used. | 227 * [onNonMatch] is omitted, the non-matching part is used. |
| 255 * | 228 * |
| 256 * Then all the converted parts are combined into the resulting string. | 229 * Then all the converted parts are combined into the resulting string. |
| 257 */ | 230 */ |
| 258 String splitMapJoin(Pattern pattern, | 231 String splitMapJoin(Pattern pattern, |
| 259 {String onMatch(Match match), | 232 {String onMatch(Match match), |
| 260 String onNonMatch(String nonMatch)}); | 233 String onNonMatch(String nonMatch)}); |
| 261 | 234 |
| 262 /** | 235 /** |
| 263 * Returns a list of UTF-16 code units of this string. | 236 * Returns an unmodifiable list of the UTF-16 code units of this string. |
| 264 * | |
| 265 * *This getter is deprecated. Use [codeUnits] instead.* | |
| 266 */ | 237 */ |
| 267 List<int> get charCodes; | 238 List<int> get codeUnits; |
| 268 | |
| 269 /** | |
| 270 * Returns an iterable of the UTF-16 code units of this string. | |
| 271 */ | |
| 272 // TODO(floitsch): should it return a list? | |
| 273 // TODO(floitsch): make it a bidirectional iterator. | |
| 274 Iterable<int> get codeUnits; | |
| 275 | 239 |
| 276 /** | 240 /** |
| 277 * Returns an iterable of Unicode code-points of this string. | 241 * Returns an iterable of Unicode code-points of this string. |
| 278 * | 242 * |
| 279 * If the string contains surrogate pairs, they will be combined and returned | 243 * If the string contains surrogate pairs, they will be combined and returned |
| 280 * as one integer by this iterator. Unmatched surrogate halves are treated | 244 * as one integer by this iterator. Unmatched surrogate halves are treated |
| 281 * like valid 16-bit code-units. | 245 * like valid 16-bit code-units. |
| 282 */ | 246 */ |
| 283 Runes get runes; | 247 Runes get runes; |
| 284 | 248 |
| (...skipping 19 matching lines...) Expand all Loading... |
| 304 final String string; | 268 final String string; |
| 305 Runes(this.string); | 269 Runes(this.string); |
| 306 | 270 |
| 307 RuneIterator get iterator => new RuneIterator(string); | 271 RuneIterator get iterator => new RuneIterator(string); |
| 308 | 272 |
| 309 int get last { | 273 int get last { |
| 310 if (string.length == 0) { | 274 if (string.length == 0) { |
| 311 throw new StateError("No elements."); | 275 throw new StateError("No elements."); |
| 312 } | 276 } |
| 313 int length = string.length; | 277 int length = string.length; |
| 314 int code = string.charCodeAt(length - 1); | 278 int code = string.codeUnitAt(length - 1); |
| 315 if (_isTrailSurrogate(code) && string.length > 1) { | 279 if (_isTrailSurrogate(code) && string.length > 1) { |
| 316 int previousCode = string.charCodeAt(length - 2); | 280 int previousCode = string.codeUnitAt(length - 2); |
| 317 if (_isLeadSurrogate(previousCode)) { | 281 if (_isLeadSurrogate(previousCode)) { |
| 318 return _combineSurrogatePair(previousCode, code); | 282 return _combineSurrogatePair(previousCode, code); |
| 319 } | 283 } |
| 320 } | 284 } |
| 321 return code; | 285 return code; |
| 322 } | 286 } |
| 323 | 287 |
| 324 } | 288 } |
| 325 | 289 |
| 326 // Is then code (a 16-bit unsigned integer) a UTF-16 lead surrogate. | 290 // Is then code (a 16-bit unsigned integer) a UTF-16 lead surrogate. |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 369 : string = string, _position = index, _nextPosition = index { | 333 : string = string, _position = index, _nextPosition = index { |
| 370 if (index < 0 || index > string.length) { | 334 if (index < 0 || index > string.length) { |
| 371 throw new RangeError.range(index, 0, string.length); | 335 throw new RangeError.range(index, 0, string.length); |
| 372 } | 336 } |
| 373 _checkSplitSurrogate(index); | 337 _checkSplitSurrogate(index); |
| 374 } | 338 } |
| 375 | 339 |
| 376 /** Throw an error if the index is in the middle of a surrogate pair. */ | 340 /** Throw an error if the index is in the middle of a surrogate pair. */ |
| 377 void _checkSplitSurrogate(int index) { | 341 void _checkSplitSurrogate(int index) { |
| 378 if (index > 0 && index < string.length && | 342 if (index > 0 && index < string.length && |
| 379 _isLeadSurrogate(string.charCodeAt(index - 1)) && | 343 _isLeadSurrogate(string.codeUnitAt(index - 1)) && |
| 380 _isTrailSurrogate(string.charCodeAt(index))) { | 344 _isTrailSurrogate(string.codeUnitAt(index))) { |
| 381 throw new ArgumentError("Index inside surrogate pair: $index"); | 345 throw new ArgumentError("Index inside surrogate pair: $index"); |
| 382 } | 346 } |
| 383 } | 347 } |
| 384 | 348 |
| 385 /** | 349 /** |
| 386 * Returns the starting position of the current rune in the string. | 350 * Returns the starting position of the current rune in the string. |
| 387 * | 351 * |
| 388 * Returns null if the [current] rune is null. | 352 * Returns null if the [current] rune is null. |
| 389 */ | 353 */ |
| 390 int get rawIndex => (_position != _nextPosition) ? _position : null; | 354 int get rawIndex => (_position != _nextPosition) ? _position : null; |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 448 if (_position + 1 == _nextPosition) return string[_position]; | 412 if (_position + 1 == _nextPosition) return string[_position]; |
| 449 return string.substring(_position, _nextPosition); | 413 return string.substring(_position, _nextPosition); |
| 450 } | 414 } |
| 451 | 415 |
| 452 bool moveNext() { | 416 bool moveNext() { |
| 453 _position = _nextPosition; | 417 _position = _nextPosition; |
| 454 if (_position == string.length) { | 418 if (_position == string.length) { |
| 455 _currentCodePoint = null; | 419 _currentCodePoint = null; |
| 456 return false; | 420 return false; |
| 457 } | 421 } |
| 458 int codeUnit = string.charCodeAt(_position); | 422 int codeUnit = string.codeUnitAt(_position); |
| 459 int nextPosition = _position + 1; | 423 int nextPosition = _position + 1; |
| 460 if (_isLeadSurrogate(codeUnit) && nextPosition < string.length) { | 424 if (_isLeadSurrogate(codeUnit) && nextPosition < string.length) { |
| 461 int nextCodeUnit = string.charCodeAt(nextPosition); | 425 int nextCodeUnit = string.codeUnitAt(nextPosition); |
| 462 if (_isTrailSurrogate(nextCodeUnit)) { | 426 if (_isTrailSurrogate(nextCodeUnit)) { |
| 463 _nextPosition = nextPosition + 1; | 427 _nextPosition = nextPosition + 1; |
| 464 _currentCodePoint = _combineSurrogatePair(codeUnit, nextCodeUnit); | 428 _currentCodePoint = _combineSurrogatePair(codeUnit, nextCodeUnit); |
| 465 return true; | 429 return true; |
| 466 } | 430 } |
| 467 } | 431 } |
| 468 _nextPosition = nextPosition; | 432 _nextPosition = nextPosition; |
| 469 _currentCodePoint = codeUnit; | 433 _currentCodePoint = codeUnit; |
| 470 return true; | 434 return true; |
| 471 } | 435 } |
| 472 | 436 |
| 473 bool movePrevious() { | 437 bool movePrevious() { |
| 474 _nextPosition = _position; | 438 _nextPosition = _position; |
| 475 if (_position == 0) { | 439 if (_position == 0) { |
| 476 _currentCodePoint = null; | 440 _currentCodePoint = null; |
| 477 return false; | 441 return false; |
| 478 } | 442 } |
| 479 int position = _position - 1; | 443 int position = _position - 1; |
| 480 int codeUnit = string.charCodeAt(position); | 444 int codeUnit = string.codeUnitAt(position); |
| 481 if (_isTrailSurrogate(codeUnit) && position > 0) { | 445 if (_isTrailSurrogate(codeUnit) && position > 0) { |
| 482 int prevCodeUnit = string.charCodeAt(position - 1); | 446 int prevCodeUnit = string.codeUnitAt(position - 1); |
| 483 if (_isLeadSurrogate(prevCodeUnit)) { | 447 if (_isLeadSurrogate(prevCodeUnit)) { |
| 484 _position = position - 1; | 448 _position = position - 1; |
| 485 _currentCodePoint = _combineSurrogatePair(prevCodeUnit, codeUnit); | 449 _currentCodePoint = _combineSurrogatePair(prevCodeUnit, codeUnit); |
| 486 return true; | 450 return true; |
| 487 } | 451 } |
| 488 } | 452 } |
| 489 _position = position; | 453 _position = position; |
| 490 _currentCodePoint = codeUnit; | 454 _currentCodePoint = codeUnit; |
| 491 return true; | 455 return true; |
| 492 } | 456 } |
| 493 } | 457 } |
| 494 | |
| 495 /** | |
| 496 * An [Iterable] of the UTF-16 code units of a [String] in index order. | |
| 497 */ | |
| 498 class CodeUnits extends ListIterable<int> { | |
| 499 /** The string that this is the code units of. */ | |
| 500 String string; | |
| 501 | |
| 502 CodeUnits(this.string); | |
| 503 | |
| 504 int get length => string.length; | |
| 505 int elementAt(int i) => string.codeUnitAt(i); | |
| 506 } | |
| OLD | NEW |