Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 part of dart.core; | 5 part of dart.core; |
| 6 | 6 |
| 7 /** | 7 /** |
| 8 * The String class represents sequences of characters. Strings are | 8 * The String class represents sequences of characters. Strings are |
| 9 * immutable. A string is represented by a sequence of Unicode UTF-16 | 9 * immutable. A string is represented by a sequence of Unicode UTF-16 |
| 10 * code units accessible through the [codeUnitAt] or the | 10 * code units accessible through the [codeUnitAt] or the |
| (...skipping 25 matching lines...) Expand all Loading... | |
| 36 * | 36 * |
| 37 * It is allowed (though generally discouraged) to create a String with only | 37 * It is allowed (though generally discouraged) to create a String with only |
| 38 * one half of a surrogate pair. | 38 * one half of a surrogate pair. |
| 39 */ | 39 */ |
| 40 factory String.fromCharCode(int charCode) { | 40 factory String.fromCharCode(int charCode) { |
| 41 List<int> charCodes = new List<int>.filled(1, charCode); | 41 List<int> charCodes = new List<int>.filled(1, charCode); |
| 42 return new String.fromCharCodes(charCodes); | 42 return new String.fromCharCodes(charCodes); |
| 43 } | 43 } |
| 44 | 44 |
| 45 /** | 45 /** |
| 46 * Gets the character (as [String]) at the given [index]. | 46 * Gets the character (as a single-code-unit [String]) at the given [index]. |
| 47 * | 47 * |
| 48 * The returned string represents exactly one UTF-16 code unit which may be | 48 * The returned string represents exactly one UTF-16 code unit which may be |
| 49 * half of a surrogate pair. For example the Unicode character for a | 49 * half of a surrogate pair. For example the Unicode character for a |
| 50 * musical G-clef ("𝄞") with rune value 0x1D11E consists of a UTF-16 surrogate | 50 * musical G-clef ("𝄞") with rune value 0x1D11E consists of a UTF-16 surrogate |
| 51 * pair: `0xD834` and `0xDD1E`. Using the index-operator on this string yields | 51 * pair: `0xD834` and `0xDD1E`. Using the index-operator on this string yields |
| 52 * a String with half of a surrogate pair: | 52 * a String with half of a surrogate pair: |
| 53 * | 53 * |
| 54 * var clef = "\u{1D11E}"; | 54 * var clef = "\u{1D11E}"; |
| 55 * clef.length; // => 2 | 55 * clef.length; // => 2 |
| 56 * clef.runes.first == 0x1D11E; // => true | 56 * clef.runes.first == 0x1D11E; // => true |
| (...skipping 20 matching lines...) Expand all Loading... | |
| 77 * | 77 * |
| 78 * Returns the number of UTF-16 code units in this string. The number | 78 * Returns the number of UTF-16 code units in this string. The number |
| 79 * of [runes] might be less, if the string contains characters outside | 79 * of [runes] might be less, if the string contains characters outside |
| 80 * the basic multilingual plane (plane 0). | 80 * the basic multilingual plane (plane 0). |
| 81 */ | 81 */ |
| 82 int get length; | 82 int get length; |
| 83 | 83 |
| 84 /** | 84 /** |
| 85 * Returns whether the two strings are equal. | 85 * Returns whether the two strings are equal. |
| 86 * | 86 * |
| 87 * This method compares each individual code unit of the strings. It does not | 87 * This method compares each individual code unit of the strings. |
| 88 * check for Unicode equivalence. For example the two following strings both | 88 * Equivalently (for strings that are well-formed UTF-16) it compares each |
| 89 * represent the string "Amélie" but, due to their different encoding will | 89 * individual rune (code point). It does not check for Unicode equivalence. |
| 90 * not return equal. | 90 * For example the two following strings both represent the string "Amélie" |
| 91 * but, due to their different encoding will not return equal. | |
| 91 * | 92 * |
| 92 * "Am\xe9lie" | 93 * "Am\xe9lie" |
| 93 * "Ame\u{301}lie" | 94 * "Ame\u{301}lie" |
| 94 * | 95 * |
| 95 * In the first string the "é" is encoded as a single unicode code unit, | 96 * In the first string the "é" is encoded as a single unicode code unit (also |
| 96 * whereas the second string encodes it as "e" with the combining | 97 * a single rune), whereas the second string encodes it as "e" with the |
| 97 * accent character "◌́". | 98 * combining accent character "◌́". |
| 98 */ | 99 */ |
| 99 bool operator ==(var other); | 100 bool operator ==(var other); |
| 100 | 101 |
| 101 /** | 102 /** |
| 102 * Returns whether this string ends with [other]. | 103 * Returns whether this string ends with [other]. |
| 103 */ | 104 */ |
| 104 bool endsWith(String other); | 105 bool endsWith(String other); |
| 105 | 106 |
| 106 /** | 107 /** |
| 107 * Returns whether this string starts with [other]. | 108 * Returns whether this string starts with [other]. |
| (...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 257 | 258 |
| 258 /** | 259 /** |
| 259 * If this string is not already all upper case, returns a new string | 260 * If this string is not already all upper case, returns a new string |
| 260 * where all characters are made upper case. Returns [:this:] otherwise. | 261 * where all characters are made upper case. Returns [:this:] otherwise. |
| 261 */ | 262 */ |
| 262 // TODO(floitsch): document better. (See EcmaScript for description). | 263 // TODO(floitsch): document better. (See EcmaScript for description). |
| 263 String toUpperCase(); | 264 String toUpperCase(); |
| 264 } | 265 } |
| 265 | 266 |
| 266 /** | 267 /** |
| 267 * The runes of a [String]. | 268 * The runes (21 bit integer Unicode code points) of a [String]. |
| 268 */ | 269 */ |
| 269 class Runes extends Iterable<int> { | 270 class Runes extends Iterable<int> { |
| 270 final String string; | 271 final String string; |
| 271 Runes(this.string); | 272 Runes(this.string); |
| 272 | 273 |
| 273 RuneIterator get iterator => new RuneIterator(string); | 274 RuneIterator get iterator => new RuneIterator(string); |
| 274 | 275 |
| 275 int get last { | 276 int get last { |
| 276 if (string.length == 0) { | 277 if (string.length == 0) { |
| 277 throw new StateError("No elements."); | 278 throw new StateError("No elements."); |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 293 bool _isLeadSurrogate(int code) => (code & 0xFC00) == 0xD800; | 294 bool _isLeadSurrogate(int code) => (code & 0xFC00) == 0xD800; |
| 294 | 295 |
| 295 // Is then code (a 16-bit unsigned integer) a UTF-16 trail surrogate. | 296 // Is then code (a 16-bit unsigned integer) a UTF-16 trail surrogate. |
| 296 bool _isTrailSurrogate(int code) => (code & 0xFC00) == 0xDC00; | 297 bool _isTrailSurrogate(int code) => (code & 0xFC00) == 0xDC00; |
| 297 | 298 |
| 298 // Combine a lead and a trail surrogate value into a single code point. | 299 // Combine a lead and a trail surrogate value into a single code point. |
| 299 int _combineSurrogatePair(int start, int end) { | 300 int _combineSurrogatePair(int start, int end) { |
| 300 return 0x10000 + ((start & 0x3FF) << 10) + (end & 0x3FF); | 301 return 0x10000 + ((start & 0x3FF) << 10) + (end & 0x3FF); |
| 301 } | 302 } |
| 302 | 303 |
| 303 /** [Iterator] for reading Unicode code points out of a Dart string. */ | 304 /** [Iterator] for reading runes (21 bit integer Unicode code points) out of a |
|
Lasse Reichstein Nielsen
2013/02/28 12:31:21
You can drop "21 bit integer" if you want, that's
| |
| 305 * Dart string. | |
| 306 */ | |
| 304 class RuneIterator implements BidirectionalIterator<int> { | 307 class RuneIterator implements BidirectionalIterator<int> { |
| 305 /** String being iterated. */ | 308 /** String being iterated. */ |
| 306 final String string; | 309 final String string; |
| 307 /** Position before the current code point. */ | 310 /** Position before the current code point. */ |
| 308 int _position; | 311 int _position; |
| 309 /** Position after the current code point. */ | 312 /** Position after the current code point. */ |
| 310 int _nextPosition; | 313 int _nextPosition; |
| 311 /** | 314 /** |
| 312 * Current code point. | 315 * Current code point. |
| 313 * | 316 * |
| (...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 384 */ | 387 */ |
| 385 void reset([int rawIndex = 0]) { | 388 void reset([int rawIndex = 0]) { |
| 386 if (rawIndex < 0 || rawIndex > string.length) { | 389 if (rawIndex < 0 || rawIndex > string.length) { |
| 387 throw new RangeError.range(rawIndex, 0, string.length); | 390 throw new RangeError.range(rawIndex, 0, string.length); |
| 388 } | 391 } |
| 389 _checkSplitSurrogate(rawIndex); | 392 _checkSplitSurrogate(rawIndex); |
| 390 _position = _nextPosition = rawIndex; | 393 _position = _nextPosition = rawIndex; |
| 391 _currentCodePoint = null; | 394 _currentCodePoint = null; |
| 392 } | 395 } |
| 393 | 396 |
| 394 /** The rune starting at the current position in the string. */ | 397 /** The rune (21 bit integer Unicode code point) starting at the current |
|
Lasse Reichstein Nielsen
2013/02/28 12:31:21
Again.
| |
| 398 * position in the string. */ | |
| 395 int get current => _currentCodePoint; | 399 int get current => _currentCodePoint; |
| 396 | 400 |
| 397 /** | 401 /** |
| 398 * The number of code units comprising the current rune. | 402 * The number of code units comprising the current rune. |
| 399 * | 403 * |
| 400 * Returns zero if there is no current rune ([current] is null). | 404 * Returns zero if there is no current rune ([current] is null). |
| 401 */ | 405 */ |
| 402 int get currentSize => _nextPosition - _position; | 406 int get currentSize => _nextPosition - _position; |
| 403 | 407 |
| 404 /** | 408 /** |
| 405 * A string containing the current rune. | 409 * A string containing the current rune. |
| 406 * | 410 * |
| 407 * For runes outside the basic multilingual plane, this will be | 411 * For runes outside the basic multilingual plane, this will be |
| 408 * a two-character String. | 412 * a String of length 2, containing two code units. |
| 409 * | 413 * |
| 410 * Returns null if [current] is null. | 414 * Returns null if [current] is null. |
| 411 */ | 415 */ |
| 412 String get currentAsString { | 416 String get currentAsString { |
| 413 if (_position == _nextPosition) return null; | 417 if (_position == _nextPosition) return null; |
| 414 if (_position + 1 == _nextPosition) return string[_position]; | 418 if (_position + 1 == _nextPosition) return string[_position]; |
| 415 return string.substring(_position, _nextPosition); | 419 return string.substring(_position, _nextPosition); |
| 416 } | 420 } |
| 417 | 421 |
| 418 bool moveNext() { | 422 bool moveNext() { |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 450 _position = position - 1; | 454 _position = position - 1; |
| 451 _currentCodePoint = _combineSurrogatePair(prevCodeUnit, codeUnit); | 455 _currentCodePoint = _combineSurrogatePair(prevCodeUnit, codeUnit); |
| 452 return true; | 456 return true; |
| 453 } | 457 } |
| 454 } | 458 } |
| 455 _position = position; | 459 _position = position; |
| 456 _currentCodePoint = codeUnit; | 460 _currentCodePoint = codeUnit; |
| 457 return true; | 461 return true; |
| 458 } | 462 } |
| 459 } | 463 } |
| OLD | NEW |