OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of dart.core; | 5 part of dart.core; |
6 | 6 |
7 /** | 7 /** |
8 * The String class represents sequences of characters. Strings are | 8 * The String class represents sequences of characters. Strings are |
9 * immutable. A string is represented by a sequence of Unicode UTF-16 | 9 * immutable. A string is represented by a sequence of Unicode UTF-16 |
10 * code units accessible through the [codeUnitAt] or the | 10 * code units accessible through the [codeUnitAt] or the |
(...skipping 262 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
273 // TODO(floitsch): make it a bidirectional iterator. | 273 // TODO(floitsch): make it a bidirectional iterator. |
274 Iterable<int> get codeUnits; | 274 Iterable<int> get codeUnits; |
275 | 275 |
276 /** | 276 /** |
277 * Returns an iterable of Unicode code-points of this string. | 277 * Returns an iterable of Unicode code-points of this string. |
278 * | 278 * |
279 * If the string contains surrogate pairs, they will be combined and returned | 279 * If the string contains surrogate pairs, they will be combined and returned |
280 * as one integer by this iterator. Unmatched surrogate halves are treated | 280 * as one integer by this iterator. Unmatched surrogate halves are treated |
281 * like valid 16-bit code-units. | 281 * like valid 16-bit code-units. |
282 */ | 282 */ |
283 // TODO(floitsch): make it a Runes class. | 283 Runes get runes; |
284 Iterable<int> get runes; | |
285 | 284 |
286 /** | 285 /** |
287 * If this string is not already all lower case, returns a new string | 286 * If this string is not already all lower case, returns a new string |
288 * where all characters are made lower case. Returns [:this:] otherwise. | 287 * where all characters are made lower case. Returns [:this:] otherwise. |
289 */ | 288 */ |
290 // TODO(floitsch): document better. (See EcmaScript for description). | 289 // TODO(floitsch): document better. (See EcmaScript for description). |
291 String toLowerCase(); | 290 String toLowerCase(); |
292 | 291 |
293 /** | 292 /** |
294 * If this string is not already all upper case, returns a new string | 293 * If this string is not already all upper case, returns a new string |
295 * where all characters are made upper case. Returns [:this:] otherwise. | 294 * where all characters are made upper case. Returns [:this:] otherwise. |
296 */ | 295 */ |
297 // TODO(floitsch): document better. (See EcmaScript for description). | 296 // TODO(floitsch): document better. (See EcmaScript for description). |
298 String toUpperCase(); | 297 String toUpperCase(); |
299 } | 298 } |
| 299 |
| 300 /** |
| 301 * The runes of a [String]. |
| 302 */ |
| 303 class Runes extends Iterable<int> { |
| 304 final String string; |
| 305 Runes(this.string); |
| 306 |
| 307 RuneIterator get iterator => new RuneIterator(string); |
| 308 |
| 309 int get last { |
| 310 if (string.length == 0) { |
| 311 throw new StateError("No elements."); |
| 312 } |
| 313 int length = string.length; |
| 314 int code = string.charCodeAt(length - 1); |
| 315 if (_isTrailSurrogate(code) && string.length > 1) { |
| 316 int previousCode = string.charCodeAt(length - 2); |
| 317 if (_isLeadSurrogate(previousCode)) { |
| 318 return _combineSurrogatePair(previousCode, code); |
| 319 } |
| 320 } |
| 321 return code; |
| 322 } |
| 323 |
| 324 } |
| 325 |
| 326 // Is then code (a 16-bit unsigned integer) a UTF-16 lead surrogate. |
| 327 bool _isLeadSurrogate(int code) => (code & 0xFC00) == 0xD800; |
| 328 |
| 329 // Is then code (a 16-bit unsigned integer) a UTF-16 trail surrogate. |
| 330 bool _isTrailSurrogate(int code) => (code & 0xFC00) == 0xDC00; |
| 331 |
| 332 // Combine a lead and a trail surrogate value into a single code point. |
| 333 int _combineSurrogatePair(int start, int end) { |
| 334 return 0x10000 + ((start & 0x3FF) << 10) + (end & 0x3FF); |
| 335 } |
| 336 |
| 337 /** [Iterator] for reading Unicode code points out of a Dart string. */ |
| 338 class RuneIterator implements BiDirectionalIterator<int> { |
| 339 /** String being iterated. */ |
| 340 final String string; |
| 341 /** Position before the current code point. */ |
| 342 int _position; |
| 343 /** Position after the current code point. */ |
| 344 int _nextPosition; |
| 345 /** |
| 346 * Current code point. |
| 347 * |
| 348 * If the iterator has hit either end, the [_currentCodePoint] is null |
| 349 * and [: _position == _nextPosition :]. |
| 350 */ |
| 351 int _currentCodePoint; |
| 352 |
| 353 /** Create an iterator positioned at the beginning of the string. */ |
| 354 RuneIterator(String string) |
| 355 : this.string = string, _position = 0, _nextPosition = 0; |
| 356 |
| 357 /** |
| 358 * Create an iterator positioned before the [index]th code unit of the string. |
| 359 * |
| 360 * When created, there is no [current] value. |
| 361 * A [moveNext] will use the rune starting at [index] the current value, |
| 362 * and a [movePrevious] will use the rune ending just before [index] as the |
| 363 * the current value. |
| 364 * |
| 365 * It is an error if the [index] position is in the middle of a surrogate |
| 366 * pair. |
| 367 */ |
| 368 RuneIterator.at(String string, int index) |
| 369 : string = string, _position = index, _nextPosition = index { |
| 370 if (index < 0 || index > string.length) { |
| 371 throw new RangeError.range(index, 0, string.length); |
| 372 } |
| 373 _checkSplitSurrogate(index); |
| 374 } |
| 375 |
| 376 /** Throw an error if the index is in the middle of a surrogate pair. */ |
| 377 void _checkSplitSurrogate(int index) { |
| 378 if (index > 0 && index < string.length && |
| 379 _isLeadSurrogate(string.charCodeAt(index - 1)) && |
| 380 _isTrailSurrogate(string.charCodeAt(index))) { |
| 381 throw new ArgumentError("Index inside surrogate pair: $index"); |
| 382 } |
| 383 } |
| 384 |
| 385 /** |
| 386 * Returns the starting position of the current rune in the string. |
| 387 * |
| 388 * Returns null if the [current] rune is null. |
| 389 */ |
| 390 int get rawIndex => (_position != _nextPosition) ? _position : null; |
| 391 |
| 392 /** |
| 393 * Resets the iterator to the rune at the specified index of the string. |
| 394 * |
| 395 * Setting a negative [rawIndex], or one greater than or equal to |
| 396 * [:string.length:], |
| 397 * is an error. So is setting it in the middle of a surrogate pair. |
| 398 * |
| 399 * Setting the position to the end of then string will set [current] to null. |
| 400 */ |
| 401 void set rawIndex(int rawIndex) { |
| 402 if (rawIndex >= string.length) { |
| 403 throw new RangeError.range(rawIndex, 0, string.length - 1); |
| 404 } |
| 405 reset(rawIndex); |
| 406 moveNext(); |
| 407 } |
| 408 |
| 409 /** |
| 410 * Resets the iterator to the given index into the string. |
| 411 * |
| 412 * After this the [current] value is unset. |
| 413 * You must call [moveNext] make the rune at the position current, |
| 414 * or [movePrevious] for the last rune before the position. |
| 415 * |
| 416 * Setting a negative [rawIndex], or one greater than [:string.length:], |
| 417 * is an error. So is setting it in the middle of a surrogate pair. |
| 418 */ |
| 419 void reset([int rawIndex = 0]) { |
| 420 if (rawIndex < 0 || rawIndex > string.length) { |
| 421 throw new RangeError.range(rawIndex, 0, string.length); |
| 422 } |
| 423 _checkSplitSurrogate(rawIndex); |
| 424 _position = _nextPosition = rawIndex; |
| 425 _currentCodePoint = null; |
| 426 } |
| 427 |
| 428 /** The rune starting at the current position in the string. */ |
| 429 int get current => _currentCodePoint; |
| 430 |
| 431 /** |
| 432 * The number of code units comprising the current rune. |
| 433 * |
| 434 * Returns zero if there is no current rune ([current] is null). |
| 435 */ |
| 436 int get currentSize => _nextPosition - _position; |
| 437 |
| 438 /** |
| 439 * A string containing the current rune. |
| 440 * |
| 441 * For runes outside the basic multilingual plane, this will be |
| 442 * a two-character String. |
| 443 * |
| 444 * Returns null if [current] is null. |
| 445 */ |
| 446 String get currentAsString { |
| 447 if (_position == _nextPosition) return null; |
| 448 if (_position + 1 == _nextPosition) return string[_position]; |
| 449 return string.substring(_position, _nextPosition); |
| 450 } |
| 451 |
| 452 |
| 453 bool moveNext() { |
| 454 _position = _nextPosition; |
| 455 if (_position == string.length) { |
| 456 _currentCodePoint = null; |
| 457 return false; |
| 458 } |
| 459 int codeUnit = string.charCodeAt(_position); |
| 460 int nextPosition = _position + 1; |
| 461 if (_isLeadSurrogate(codeUnit) && nextPosition < string.length) { |
| 462 int nextCodeUnit = string.charCodeAt(nextPosition); |
| 463 if (_isTrailSurrogate(nextCodeUnit)) { |
| 464 _nextPosition = nextPosition + 1; |
| 465 _currentCodePoint = _combineSurrogatePair(codeUnit, nextCodeUnit); |
| 466 return true; |
| 467 } |
| 468 } |
| 469 _nextPosition = nextPosition; |
| 470 _currentCodePoint = codeUnit; |
| 471 return true; |
| 472 } |
| 473 |
| 474 bool movePrevious() { |
| 475 _nextPosition = _position; |
| 476 if (_position == 0) { |
| 477 _currentCodePoint = null; |
| 478 return false; |
| 479 } |
| 480 int position = _position - 1; |
| 481 int codeUnit = string.charCodeAt(position); |
| 482 if (_isTrailSurrogate(codeUnit) && position > 0) { |
| 483 int prevCodeUnit = string.charCodeAt(position - 1); |
| 484 if (_isLeadSurrogate(prevCodeUnit)) { |
| 485 _position = position - 1; |
| 486 _currentCodePoint = _combineSurrogatePair(prevCodeUnit, codeUnit); |
| 487 return true; |
| 488 } |
| 489 } |
| 490 _position = position; |
| 491 _currentCodePoint = codeUnit; |
| 492 return true; |
| 493 } |
| 494 } |
OLD | NEW |