OLD | NEW |
---|---|
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of dart.core; | 5 part of dart.core; |
6 | 6 |
7 /** | 7 /** |
8 * The String class represents sequences of characters. Strings are | 8 * The String class represents sequences of characters. Strings are |
9 * immutable. A string is represented by a sequence of Unicode UTF-16 | 9 * immutable. A string is represented by a sequence of Unicode UTF-16 |
10 * code units accessible through the [codeUnitAt] or the | 10 * code units accessible through the [codeUnitAt] or the |
(...skipping 25 matching lines...) Expand all Loading... | |
36 * | 36 * |
37 * It is allowed (though generally discouraged) to create a String with only | 37 * It is allowed (though generally discouraged) to create a String with only |
38 * one half of a surrogate pair. | 38 * one half of a surrogate pair. |
39 */ | 39 */ |
40 factory String.fromCharCode(int charCode) { | 40 factory String.fromCharCode(int charCode) { |
41 List<int> charCodes = new List<int>.filled(1, charCode); | 41 List<int> charCodes = new List<int>.filled(1, charCode); |
42 return new String.fromCharCodes(charCodes); | 42 return new String.fromCharCodes(charCodes); |
43 } | 43 } |
44 | 44 |
45 /** | 45 /** |
46 * Gets the character (as [String]) at the given [index]. | 46 * Gets the character (as a single-code-unit [String]) at the given [index]. |
47 * | 47 * |
48 * The returned string represents exactly one UTF-16 code unit which may be | 48 * The returned string represents exactly one UTF-16 code unit which may be |
49 * half of a surrogate pair. For example the Unicode character for a | 49 * half of a surrogate pair. For example the Unicode character for a |
50 * musical G-clef ("𝄞") with rune value 0x1D11E consists of a UTF-16 surrogate | 50 * musical G-clef ("𝄞") with rune value 0x1D11E consists of a UTF-16 surrogate |
51 * pair: `0xD834` and `0xDD1E`. Using the index-operator on this string yields | 51 * pair: `0xD834` and `0xDD1E`. Using the index-operator on this string yields |
52 * a String with half of a surrogate pair: | 52 * a String with half of a surrogate pair: |
53 * | 53 * |
54 * var clef = "\u{1D11E}"; | 54 * var clef = "\u{1D11E}"; |
55 * clef.length; // => 2 | 55 * clef.length; // => 2 |
56 * clef.runes.first == 0x1D11E; // => true | 56 * clef.runes.first == 0x1D11E; // => true |
(...skipping 20 matching lines...) Expand all Loading... | |
77 * | 77 * |
78 * Returns the number of UTF-16 code units in this string. The number | 78 * Returns the number of UTF-16 code units in this string. The number |
79 * of [runes] might be less, if the string contains characters outside | 79 * of [runes] might be less, if the string contains characters outside |
80 * the basic multilingual plane (plane 0). | 80 * the basic multilingual plane (plane 0). |
81 */ | 81 */ |
82 int get length; | 82 int get length; |
83 | 83 |
84 /** | 84 /** |
85 * Returns whether the two strings are equal. | 85 * Returns whether the two strings are equal. |
86 * | 86 * |
87 * This method compares each individual code unit of the strings. It does not | 87 * This method compares each individual code unit of the strings. |
88 * check for Unicode equivalence. For example the two following strings both | 88 * Equivalently (for strings that are well-formed UTF-16) it compares each |
89 * represent the string "Amélie" but, due to their different encoding will | 89 * individual rune (code point). It does not check for Unicode equivalence. |
90 * not return equal. | 90 * For example the two following strings both represent the string "Amélie" |
91 * but, due to their different encoding will not return equal. | |
91 * | 92 * |
92 * "Am\xe9lie" | 93 * "Am\xe9lie" |
93 * "Ame\u{301}lie" | 94 * "Ame\u{301}lie" |
94 * | 95 * |
95 * In the first string the "é" is encoded as a single unicode code unit, | 96 * In the first string the "é" is encoded as a single unicode code unit (also |
96 * whereas the second string encodes it as "e" with the combining | 97 * a single rune), whereas the second string encodes it as "e" with the |
97 * accent character "◌́". | 98 * combining accent character "◌́". |
98 */ | 99 */ |
99 bool operator ==(var other); | 100 bool operator ==(var other); |
100 | 101 |
101 /** | 102 /** |
102 * Returns whether this string ends with [other]. | 103 * Returns whether this string ends with [other]. |
103 */ | 104 */ |
104 bool endsWith(String other); | 105 bool endsWith(String other); |
105 | 106 |
106 /** | 107 /** |
107 * Returns whether this string starts with [other]. | 108 * Returns whether this string starts with [other]. |
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
257 | 258 |
258 /** | 259 /** |
259 * If this string is not already all upper case, returns a new string | 260 * If this string is not already all upper case, returns a new string |
260 * where all characters are made upper case. Returns [:this:] otherwise. | 261 * where all characters are made upper case. Returns [:this:] otherwise. |
261 */ | 262 */ |
262 // TODO(floitsch): document better. (See EcmaScript for description). | 263 // TODO(floitsch): document better. (See EcmaScript for description). |
263 String toUpperCase(); | 264 String toUpperCase(); |
264 } | 265 } |
265 | 266 |
266 /** | 267 /** |
267 * The runes of a [String]. | 268 * The runes (21 bit integer Unicode code points) of a [String]. |
268 */ | 269 */ |
269 class Runes extends Iterable<int> { | 270 class Runes extends Iterable<int> { |
270 final String string; | 271 final String string; |
271 Runes(this.string); | 272 Runes(this.string); |
272 | 273 |
273 RuneIterator get iterator => new RuneIterator(string); | 274 RuneIterator get iterator => new RuneIterator(string); |
274 | 275 |
275 int get last { | 276 int get last { |
276 if (string.length == 0) { | 277 if (string.length == 0) { |
277 throw new StateError("No elements."); | 278 throw new StateError("No elements."); |
(...skipping 15 matching lines...) Expand all Loading... | |
293 bool _isLeadSurrogate(int code) => (code & 0xFC00) == 0xD800; | 294 bool _isLeadSurrogate(int code) => (code & 0xFC00) == 0xD800; |
294 | 295 |
295 // Is then code (a 16-bit unsigned integer) a UTF-16 trail surrogate. | 296 // Is then code (a 16-bit unsigned integer) a UTF-16 trail surrogate. |
296 bool _isTrailSurrogate(int code) => (code & 0xFC00) == 0xDC00; | 297 bool _isTrailSurrogate(int code) => (code & 0xFC00) == 0xDC00; |
297 | 298 |
298 // Combine a lead and a trail surrogate value into a single code point. | 299 // Combine a lead and a trail surrogate value into a single code point. |
299 int _combineSurrogatePair(int start, int end) { | 300 int _combineSurrogatePair(int start, int end) { |
300 return 0x10000 + ((start & 0x3FF) << 10) + (end & 0x3FF); | 301 return 0x10000 + ((start & 0x3FF) << 10) + (end & 0x3FF); |
301 } | 302 } |
302 | 303 |
303 /** [Iterator] for reading Unicode code points out of a Dart string. */ | 304 /** [Iterator] for reading runes (21 bit integer Unicode code points) out of a |
Lasse Reichstein Nielsen
2013/02/28 12:31:21
You can drop "21 bit integer" if you want, that's
| |
305 * Dart string. | |
306 */ | |
304 class RuneIterator implements BidirectionalIterator<int> { | 307 class RuneIterator implements BidirectionalIterator<int> { |
305 /** String being iterated. */ | 308 /** String being iterated. */ |
306 final String string; | 309 final String string; |
307 /** Position before the current code point. */ | 310 /** Position before the current code point. */ |
308 int _position; | 311 int _position; |
309 /** Position after the current code point. */ | 312 /** Position after the current code point. */ |
310 int _nextPosition; | 313 int _nextPosition; |
311 /** | 314 /** |
312 * Current code point. | 315 * Current code point. |
313 * | 316 * |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
384 */ | 387 */ |
385 void reset([int rawIndex = 0]) { | 388 void reset([int rawIndex = 0]) { |
386 if (rawIndex < 0 || rawIndex > string.length) { | 389 if (rawIndex < 0 || rawIndex > string.length) { |
387 throw new RangeError.range(rawIndex, 0, string.length); | 390 throw new RangeError.range(rawIndex, 0, string.length); |
388 } | 391 } |
389 _checkSplitSurrogate(rawIndex); | 392 _checkSplitSurrogate(rawIndex); |
390 _position = _nextPosition = rawIndex; | 393 _position = _nextPosition = rawIndex; |
391 _currentCodePoint = null; | 394 _currentCodePoint = null; |
392 } | 395 } |
393 | 396 |
394 /** The rune starting at the current position in the string. */ | 397 /** The rune (21 bit integer Unicode code point) starting at the current |
Lasse Reichstein Nielsen
2013/02/28 12:31:21
Again.
| |
398 * position in the string. */ | |
395 int get current => _currentCodePoint; | 399 int get current => _currentCodePoint; |
396 | 400 |
397 /** | 401 /** |
398 * The number of code units comprising the current rune. | 402 * The number of code units comprising the current rune. |
399 * | 403 * |
400 * Returns zero if there is no current rune ([current] is null). | 404 * Returns zero if there is no current rune ([current] is null). |
401 */ | 405 */ |
402 int get currentSize => _nextPosition - _position; | 406 int get currentSize => _nextPosition - _position; |
403 | 407 |
404 /** | 408 /** |
405 * A string containing the current rune. | 409 * A string containing the current rune. |
406 * | 410 * |
407 * For runes outside the basic multilingual plane, this will be | 411 * For runes outside the basic multilingual plane, this will be |
408 * a two-character String. | 412 * a String of length 2, containing two code units. |
409 * | 413 * |
410 * Returns null if [current] is null. | 414 * Returns null if [current] is null. |
411 */ | 415 */ |
412 String get currentAsString { | 416 String get currentAsString { |
413 if (_position == _nextPosition) return null; | 417 if (_position == _nextPosition) return null; |
414 if (_position + 1 == _nextPosition) return string[_position]; | 418 if (_position + 1 == _nextPosition) return string[_position]; |
415 return string.substring(_position, _nextPosition); | 419 return string.substring(_position, _nextPosition); |
416 } | 420 } |
417 | 421 |
418 bool moveNext() { | 422 bool moveNext() { |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
450 _position = position - 1; | 454 _position = position - 1; |
451 _currentCodePoint = _combineSurrogatePair(prevCodeUnit, codeUnit); | 455 _currentCodePoint = _combineSurrogatePair(prevCodeUnit, codeUnit); |
452 return true; | 456 return true; |
453 } | 457 } |
454 } | 458 } |
455 _position = position; | 459 _position = position; |
456 _currentCodePoint = codeUnit; | 460 _currentCodePoint = codeUnit; |
457 return true; | 461 return true; |
458 } | 462 } |
459 } | 463 } |
OLD | NEW |