Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 part of dart.core; | 5 part of dart.core; |
| 6 | 6 |
| 7 /** | 7 /** |
| 8 * The String class represents sequences of characters. Strings are | 8 * The String class represents sequences of characters. Strings are |
| 9 * immutable. A string is represented by a sequence of Unicode UTF-16 | 9 * immutable. A string is represented by a sequence of Unicode UTF-16 |
| 10 * code units accessible through the [codeUnitAt] or the | 10 * code units accessible through the [codeUnitAt] or the |
| (...skipping 262 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 273 // TODO(floitsch): make it a bidirectional iterator. | 273 // TODO(floitsch): make it a bidirectional iterator. |
| 274 Iterable<int> get codeUnits; | 274 Iterable<int> get codeUnits; |
| 275 | 275 |
| 276 /** | 276 /** |
| 277 * Returns an iterable of Unicode code-points of this string. | 277 * Returns an iterable of Unicode code-points of this string. |
| 278 * | 278 * |
| 279 * If the string contains surrogate pairs, they will be combined and returned | 279 * If the string contains surrogate pairs, they will be combined and returned |
| 280 * as one integer by this iterator. Unmatched surrogate halves are treated | 280 * as one integer by this iterator. Unmatched surrogate halves are treated |
| 281 * like valid 16-bit code-units. | 281 * like valid 16-bit code-units. |
| 282 */ | 282 */ |
| 283 // TODO(floitsch): make it a Runes class. | 283 Runes get runes; |
| 284 Iterable<int> get runes; | |
| 285 | 284 |
| 286 /** | 285 /** |
| 287 * If this string is not already all lower case, returns a new string | 286 * If this string is not already all lower case, returns a new string |
| 288 * where all characters are made lower case. Returns [:this:] otherwise. | 287 * where all characters are made lower case. Returns [:this:] otherwise. |
| 289 */ | 288 */ |
| 290 // TODO(floitsch): document better. (See EcmaScript for description). | 289 // TODO(floitsch): document better. (See EcmaScript for description). |
| 291 String toLowerCase(); | 290 String toLowerCase(); |
| 292 | 291 |
| 293 /** | 292 /** |
| 294 * If this string is not already all upper case, returns a new string | 293 * If this string is not already all upper case, returns a new string |
| 295 * where all characters are made upper case. Returns [:this:] otherwise. | 294 * where all characters are made upper case. Returns [:this:] otherwise. |
| 296 */ | 295 */ |
| 297 // TODO(floitsch): document better. (See EcmaScript for description). | 296 // TODO(floitsch): document better. (See EcmaScript for description). |
| 298 String toUpperCase(); | 297 String toUpperCase(); |
| 299 } | 298 } |
| 299 | |
| 300 /** | |
| 301 * The runes of a [String]. | |
| 302 */ | |
| 303 class Runes extends Iterable<int> { | |
| 304 final String string; | |
| 305 Runes(this.string); | |
| 306 | |
| 307 RuneIterator get iterator => new RuneIterator(string); | |
| 308 | |
| 309 int get first { | |
|
floitsch
2013/02/12 14:23:10
You think it's worth specializing "first" ?
Lasse Reichstein Nielsen
2013/02/12 15:13:23
Probably not. Let's drop it.
| |
| 310 if (string.length == 0) { | |
| 311 throw new StateError("No elements."); | |
| 312 } | |
| 313 int code = string.charCodeAt(0); | |
| 314 if (_isLeadSurrogate(code) && string.length > 1) { | |
| 315 int nextCode = string.charCodeAt(1); | |
| 316 if (_isTrailSurrogate(nextCode)) { | |
| 317 return _combineSurrogatePair(code, nextCode); | |
| 318 } | |
| 319 } | |
| 320 return code; | |
| 321 } | |
| 322 | |
| 323 int get last { | |
| 324 if (string.length == 0) { | |
| 325 throw new StateError("No elements."); | |
| 326 } | |
| 327 int length = string.length; | |
| 328 int code = string.charCodeAt(length - 1); | |
| 329 if (_isTrailSurrogate(code) && string.length > 1) { | |
| 330 int previousCode = string.charCodeAt(length - 2); | |
| 331 if (_isLeadSurrogate(previousCode)) { | |
| 332 return _combineSurrogatePair(previousCode, code); | |
| 333 } | |
| 334 } | |
| 335 return code; | |
| 336 } | |
| 337 | |
| 338 } | |
| 339 | |
| 340 // Is then code (a 16-bit unsigned integer) a UTF-16 lead surrogate. | |
| 341 bool _isLeadSurrogate(int code) => (code & 0xFC00) == 0xD800; | |
| 342 | |
| 343 // Is then code (a 16-bit unsigned integer) a UTF-16 trail surrogate. | |
| 344 bool _isTrailSurrogate(int code) => (code & 0xFC00) == 0xDC00; | |
| 345 | |
| 346 // Combine a lead and a trail surrogate value into a single code point. | |
| 347 int _combineSurrogatePair(int start, int end) { | |
| 348 return 0x10000 + ((start & 0x3FF) << 10) + (end & 0x3FF); | |
| 349 } | |
| 350 | |
| 351 /** [Iterator] for reading Unicode code points out of a Dart string. */ | |
| 352 class RuneIterator implements BiDirectionalIterator<int> { | |
| 353 /** String being iterated. */ | |
| 354 final String string; | |
| 355 /** Position before the current code point. */ | |
| 356 int _position; | |
| 357 /** Position after the current code point. */ | |
| 358 int _nextPosition; | |
| 359 /** | |
| 360 * Current code point. | |
| 361 * | |
| 362 * If the iterator has hit either end, the [_currentCodePoint] is null | |
| 363 * and [: _position == _nextPosition :]. | |
| 364 */ | |
| 365 int _currentCodePoint; | |
| 366 | |
| 367 /** Create an iterator positioned at the beginning of the string. */ | |
| 368 RuneIterator(String string) | |
| 369 : string = string, _position = 0, _nextPosition = 0; | |
|
floitsch
2013/02/12 14:23:10
not necessary, but reads better with "this.string
Lasse Reichstein Nielsen
2013/02/12 15:13:23
Done.
| |
| 370 | |
| 371 /** | |
| 372 * Create an iterator positioned before the [index]th code unit of the string. | |
| 373 * | |
| 374 * A [moveNext] will make the following code point the current value, and a | |
|
floitsch
2013/02/12 14:23:10
don't use "code point", but "rune".
Not perfect ei
Lasse Reichstein Nielsen
2013/02/12 15:13:23
Reworded.
| |
| 375 * [movePrevious] will make the preceding code pount the current value. | |
|
floitsch
2013/02/12 14:23:10
point.
Lasse Reichstein Nielsen
2013/02/12 15:13:23
Done.
| |
| 376 * | |
| 377 * It is an error if the [index] position is in the middle of a surrogate | |
| 378 * pair. | |
| 379 */ | |
| 380 RuneIterator.at(String string, int index) | |
| 381 : string = string, _position = index, _nextPosition = index { | |
| 382 if (index < 0 || index > string.length) { | |
| 383 throw new RangeError.range(index, 0, string.length); | |
| 384 } | |
| 385 _checkSplitSurrogate(index); | |
| 386 } | |
| 387 | |
| 388 /** Throw an error if the index is in the middle of a surrogate pair. */ | |
| 389 void _checkSplitSurrogate(int index) { | |
| 390 if (index > 0 && index < string.length && | |
| 391 _isLeadSurrogate(string.charCodeAt(index - 1)) && | |
| 392 _isTrailSurrogate(string.charCodeAt(index))) { | |
| 393 throw new ArgumentError("Index inside surrogate pair: $index"); | |
| 394 } | |
| 395 } | |
| 396 | |
| 397 /** | |
| 398 * Returns the starting position of the current rune in the string. | |
| 399 * | |
| 400 * If the current rune is null, this is the index of the rune that | |
|
floitsch
2013/02/12 14:23:10
I would prefer if rawIndex was null or -1 then.
Lasse Reichstein Nielsen
2013/02/12 15:13:23
This way, someone can give you an uninitialized it
Lasse Reichstein Nielsen
2013/02/12 15:20:23
I've changed it to return null anyway, if there is
| |
| 401 * will become current after a call to [moveNext]. | |
| 402 */ | |
| 403 int get rawIndex => _position; | |
| 404 | |
| 405 /** | |
| 406 * Resets the iterator to the rune at the specified index of the string. | |
| 407 * | |
| 408 * Setting a negative [rawIndex], or one greater than [:string.length:], | |
|
floitsch
2013/02/12 14:23:10
If we allow "string.length" we should maybe allow
Lasse Reichstein Nielsen
2013/02/12 15:13:23
I'd disallow string.length then.
Positions in st
floitsch
2013/02/13 10:12:19
*Nobody* except the implementors see it this way.
| |
| 409 * is an error. So is setting it in the middle of a surrogate pair. | |
| 410 * | |
| 411 * Setting the position to the end of then string will set [current] to null. | |
| 412 */ | |
| 413 void set rawIndex(int rawIndex) { | |
| 414 reset(rawIndex); | |
| 415 moveNext(); | |
| 416 } | |
| 417 | |
| 418 /** | |
| 419 * Resets the iterator to the given index into the string. | |
| 420 * | |
| 421 * After this the [current] value is unset. | |
| 422 * You must call [moveNext] make the rune at the position current, | |
| 423 * or [movePrevious] for the last rune before the position. | |
| 424 * | |
| 425 * Setting a negative [rawIndex], or one greater than [:string.length:], | |
|
floitsch
2013/02/12 14:23:10
ditto. Maybe we should allow "-1".
Lasse Reichstein Nielsen
2013/02/12 15:13:23
For what?
reset(0) is a reset to the beginning of
floitsch
2013/02/13 10:12:19
If I reset to (0) and then moveNext I don't expect
Lasse Reichstein Nielsen
2013/02/13 17:20:42
That's not how RuneIterator.reset works.
Are you
floitsch
2013/02/13 17:34:50
Ok for original behavior for reset (including allo
| |
| 426 * is an error. So is setting it in the middle of a surrogate pair. | |
| 427 */ | |
| 428 void reset([int rawIndex = 0]) { | |
| 429 if (rawIndex < 0 || rawIndex > string.length) { | |
| 430 throw new RangeError.range(rawIndex, 0, string.length); | |
| 431 } | |
| 432 _checkSplitSurrogate(rawIndex); | |
| 433 _position = _nextPosition = rawIndex; | |
| 434 _currentCodePoint = null; | |
| 435 } | |
| 436 | |
| 437 /** The rune starting at the current position in the string. */ | |
| 438 int get current => _currentCodePoint; | |
| 439 | |
| 440 /** | |
| 441 * The number of code units comprising the current rune. | |
| 442 * | |
| 443 * Returns zero if the current rune | |
|
floitsch
2013/02/12 14:23:10
unfinished sentence.
Lasse Reichstein Nielsen
2013/02/12 15:13:23
Done.
| |
| 444 */ | |
| 445 int get currentSize => _nextPosition - _position; | |
| 446 | |
| 447 /** | |
| 448 * A string containing the current rune. | |
| 449 * | |
| 450 * For runes outside the basic multilingual plane, this will be | |
| 451 * a two-character String. | |
| 452 * | |
| 453 * Returns null if [current] is null. | |
| 454 */ | |
| 455 String get currentAsString { | |
| 456 if (_position == _nextPosition) return null; | |
| 457 if (_position + 1 == _nextPosition) return string[_position]; | |
| 458 return string.substring(_position, _nextPosition); | |
| 459 } | |
| 460 | |
| 461 | |
| 462 bool moveNext() { | |
| 463 _position = _nextPosition; | |
| 464 if (_nextPosition == string.length) { | |
|
floitsch
2013/02/12 14:23:10
_position. No need for _nextPosition anymore.
Lasse Reichstein Nielsen
2013/02/12 15:13:23
Done.
| |
| 465 _currentCodePoint = null; | |
| 466 return false; | |
| 467 } | |
| 468 int codeUnit = string.charCodeAt(_position); | |
| 469 int nextPosition = _position + 1; | |
| 470 if (_isLeadSurrogate(codeUnit) && nextPosition < string.length) { | |
| 471 int nextCodeUnit = string.charCodeAt(nextPosition); | |
| 472 if (_isTrailSurrogate(nextCodeUnit)) { | |
| 473 _nextPosition = nextPosition + 1; | |
| 474 _currentCodePoint = _combineSurrogatePair(codeUnit, nextCodeUnit); | |
| 475 return true; | |
| 476 } | |
| 477 } | |
| 478 _nextPosition = nextPosition; | |
| 479 _currentCodePoint = codeUnit; | |
| 480 return true; | |
| 481 } | |
| 482 | |
| 483 bool movePrevious() { | |
| 484 _nextPosition = _position; | |
| 485 if (_position == 0) { | |
| 486 _currentCodePoint = null; | |
| 487 return false; | |
| 488 } | |
| 489 int position = _position - 1; | |
| 490 int codeUnit = string.charCodeAt(position); | |
| 491 if (_isTrailSurrogate(codeUnit) && position > 0) { | |
| 492 int prevCodeUnit = string.charCodeAt(position - 1); | |
| 493 if (_isLeadSurrogate(prevCodeUnit)) { | |
| 494 _position = position - 1; | |
| 495 _currentCodePoint = _combineSurrogatePair(prevCodeUnit, codeUnit); | |
| 496 return true; | |
| 497 } | |
| 498 } | |
| 499 _position = position; | |
| 500 _currentCodePoint = codeUnit; | |
| 501 return true; | |
| 502 } | |
| 503 } | |
| OLD | NEW |