OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 part of dart.core; | |
6 | |
7 /** | |
8 * A sequence of characters. | |
9 * | |
10 * A string can be either single or multiline. Single line strings are | |
11 * written using matching single or double quotes, and multiline strings are | |
12 * written using triple quotes. The following are all valid Dart strings: | |
13 * | |
14 * 'Single quotes'; | |
15 * "Double quotes"; | |
16 * 'Double quotes in "single" quotes'; | |
17 * "Single quotes in 'double' quotes"; | |
18 * | |
19 * '''A | |
20 * multiline | |
21 * string'''; | |
22 * | |
23 * """ | |
24 * Another | |
25 * multiline | |
26 * string"""; | |
27 * | |
28 * Strings are immutable. Although you cannot change a string, you can perform | |
29 * an operation on a string and assign the result to a new string: | |
30 * | |
31 * var string = 'Dart is fun'; | |
32 * var newString = string.substring(0, 5); | |
33 * | |
34 * You can use the plus (`+`) operator to concatenate strings: | |
35 * | |
36 * 'Dart ' + 'is ' + 'fun!'; // 'Dart is fun!' | |
37 * | |
38 * You can also use adjacent string literals for concatenation: | |
39 * | |
40 * 'Dart ' 'is ' 'fun!'; // 'Dart is fun!' | |
41 * | |
42 * You can use `${}` to interpolate the value of Dart expressions | |
43 * within strings. The curly braces can be omitted when evaluating identifiers: | |
44 * | |
45 * string = 'dartlang'; | |
46 * '$string has ${string.length} letters'; // 'dartlang has 8 letters' | |
47 * | |
48 * A string is represented by a sequence of Unicode UTF-16 code units | |
49 * accessible through the [codeUnitAt] or the [codeUnits] members: | |
50 * | |
51 * string = 'Dart'; | |
52 * string.codeUnitAt(0); // 68 | |
53 * string.codeUnits; // [68, 97, 114, 116] | |
54 * | |
55 * The string representation of code units is accessible through the index | |
56 * operator: | |
57 * | |
58 * string[0]; // 'D' | |
59 * | |
60 * The characters of a string are encoded in UTF-16. Decoding UTF-16, which | |
61 * combines surrogate pairs, yields Unicode code points. Following a similar | |
62 * terminology to Go, we use the name 'rune' for an integer representing a | |
63 * Unicode code point. Use the [runes] property to get the runes of a string: | |
64 * | |
65 * string.runes.toList(); // [68, 97, 114, 116] | |
66 * | |
67 * For a character outside the Basic Multilingual Plane (plane 0) that is | |
68 * composed of a surrogate pair, [runes] combines the pair and returns a | |
69 * single integer. For example, the Unicode character for a | |
70 * musical G-clef ('𝄞') with rune value 0x1D11E consists of a UTF-16 surrogate | |
71 * pair: `0xD834` and `0xDD1E`. Using [codeUnits] returns the surrogate pair, | |
72 * and using `runes` returns their combined value: | |
73 * | |
74 * var clef = '\u{1D11E}'; | |
75 * clef.codeUnits; // [0xD834, 0xDD1E] | |
76 * clef.runes.toList(); // [0x1D11E] | |
77 * | |
78 * The String class can not be extended or implemented. Attempting to do so | |
79 * yields a compile-time error. | |
80 * | |
81 * ## Other resources | |
82 * | |
83 * See [StringBuffer] to efficiently build a string incrementally. See | |
84 * [RegExp] to work with regular expressions. | |
85 * | |
86 * Also see: | |
87 | |
88 * * [Dart Cookbook](https://www.dartlang.org/docs/cookbook/#strings) | |
89 * for String examples and recipes. | |
90 * * [Dart Up and Running](https://www.dartlang.org/docs/dart-up-and-running/ch0
3.html#strings-and-regular-expressions) | |
91 */ | |
92 abstract class String implements Comparable<String>, Pattern { | |
93 /** | |
94 * Allocates a new String for the specified [charCodes]. | |
95 * | |
96 * The [charCodes] can be UTF-16 code units or runes. If a char-code value is | |
97 * 16-bit, it is copied verbatim: | |
98 * | |
99 * new String.fromCharCodes([68]); // 'D' | |
100 * | |
101 * If a char-code value is greater than 16-bits, it is decomposed into a | |
102 * surrogate pair: | |
103 * | |
104 * var clef = new String.fromCharCodes([0x1D11E]); | |
105 * clef.codeUnitAt(0); // 0xD834 | |
106 * clef.codeUnitAt(1); // 0xDD1E | |
107 * | |
108 * If [start] and [end] is provided, only the values of [charCodes] | |
109 * at positions from `start` to, but not including, `end`, are used. | |
110 * The `start` and `end` values must satisfy | |
111 * `0 <= start <= end <= charCodes.length`. | |
112 */ | |
113 external factory String.fromCharCodes(Iterable<int> charCodes, | |
114 [int start = 0, int end]); | |
115 | |
116 /** | |
117 * Allocates a new String for the specified [charCode]. | |
118 * | |
119 * If the [charCode] can be represented by a single UTF-16 code unit, the new | |
120 * string contains a single code unit. Otherwise, the [length] is 2 and | |
121 * the code units form a surrogate pair. See documentation for | |
122 * [fromCharCodes]. | |
123 * | |
124 * Creating a String with half of a surrogate pair is allowed. | |
125 */ | |
126 external factory String.fromCharCode(int charCode); | |
127 | |
128 /** | |
129 * Returns the string value of the environment declaration [name]. | |
130 * | |
131 * Environment declarations are provided by the surrounding system compiling | |
132 * or running the Dart program. Declarations map a string key to a string | |
133 * value. | |
134 * | |
135 * If [name] is not declared in the environment, the result is instead | |
136 * [defaultValue]. | |
137 * | |
138 * Example of getting a value: | |
139 * | |
140 * const String.fromEnvironment("defaultFloo", defaultValue: "no floo") | |
141 * | |
142 * Example of checking whether a declaration is there at all: | |
143 * | |
144 * var isDeclared = const String.fromEnvironment("maybeDeclared") != null; | |
145 */ | |
146 external const factory String.fromEnvironment(String name, | |
147 {String defaultValue}); | |
148 | |
149 /** | |
150 * Gets the character (as a single-code-unit [String]) at the given [index]. | |
151 * | |
152 * The returned string represents exactly one UTF-16 code unit, which may be | |
153 * half of a surrogate pair. A single member of a surrogate pair is an | |
154 * invalid UTF-16 string: | |
155 * | |
156 * var clef = '\u{1D11E}'; | |
157 * // These represent invalid UTF-16 strings. | |
158 * clef[0].codeUnits; // [0xD834] | |
159 * clef[1].codeUnits; // [0xDD1E] | |
160 * | |
161 * This method is equivalent to | |
162 * `new String.fromCharCode(this.codeUnitAt(index))`. | |
163 */ | |
164 String operator [](int index); | |
165 | |
166 /** | |
167 * Returns the 16-bit UTF-16 code unit at the given [index]. | |
168 */ | |
169 int codeUnitAt(int index); | |
170 | |
171 /** | |
172 * The length of the string. | |
173 * | |
174 * Returns the number of UTF-16 code units in this string. The number | |
175 * of [runes] might be fewer, if the string contains characters outside | |
176 * the Basic Multilingual Plane (plane 0): | |
177 * | |
178 * 'Dart'.length; // 4 | |
179 * 'Dart'.runes.length; // 4 | |
180 * | |
181 * var clef = '\u{1D11E}'; | |
182 * clef.length; // 2 | |
183 * clef.runes.length; // 1 | |
184 */ | |
185 int get length; | |
186 | |
187 /** | |
188 * Returns a hash code derived from the code units of the string. | |
189 * | |
190 * This is compatible with [operator==]. Strings with the same sequence | |
191 * of code units have the same hash code. | |
192 */ | |
193 int get hashCode; | |
194 | |
195 /** | |
196 * Returns true if other is a `String` with the same sequence of code units. | |
197 * | |
198 * This method compares each individual code unit of the strings. | |
199 * It does not check for Unicode equivalence. | |
200 * For example, both the following strings represent the string 'Amélie', | |
201 * but due to their different encoding, are not equal: | |
202 * | |
203 * 'Am\xe9lie' == 'Ame\u{301}lie'; // false | |
204 * | |
205 * The first string encodes 'é' as a single unicode code unit (also | |
206 * a single rune), whereas the second string encodes it as 'e' with the | |
207 * combining accent character '◌́'. | |
208 */ | |
209 bool operator ==(Object other); | |
210 | |
211 /** | |
212 * Returns true if this string ends with [other]. For example: | |
213 * | |
214 * 'Dart'.endsWith('t'); // true | |
215 */ | |
216 bool endsWith(String other); | |
217 | |
218 /** | |
219 * Returns true if this string starts with a match of [pattern]. | |
220 * | |
221 * var string = 'Dart'; | |
222 * string.startsWith('D'); // true | |
223 * string.startsWith(new RegExp(r'[A-Z][a-z]')); // true | |
224 * | |
225 * If [index] is provided, this method checks if the substring starting | |
226 * at that index starts with a match of [pattern]: | |
227 * | |
228 * string.startsWith('art', 1); // true | |
229 * string.startsWith(new RegExp(r'\w{3}')); // true | |
230 * | |
231 * [index] must not be negative or greater than [length]. | |
232 * | |
233 * A [RegExp] containing '^' does not match if the [index] is greater than | |
234 * zero. The pattern works on the string as a whole, and does not extract | |
235 * a substring starting at [index] first: | |
236 * | |
237 * string.startsWith(new RegExp(r'^art'), 1); // false | |
238 * string.startsWith(new RegExp(r'art'), 1); // true | |
239 */ | |
240 bool startsWith(Pattern pattern, [int index = 0]); | |
241 | |
242 /** | |
243 * Returns the position of the first match of [pattern] in this string, | |
244 * starting at [start], inclusive: | |
245 * | |
246 * var string = 'Dartisans'; | |
247 * string.indexOf('art'); // 1 | |
248 * string.indexOf(new RegExp(r'[A-Z][a-z]')); // 0 | |
249 * | |
250 * Returns -1 if no match is found: | |
251 * | |
252 * string.indexOf(new RegExp(r'dart')); // -1 | |
253 * | |
254 * [start] must not be negative or greater than [length]. | |
255 */ | |
256 int indexOf(Pattern pattern, [int start]); | |
257 | |
258 /** | |
259 * Returns the position of the last match [pattern] in this string, searching | |
260 * backward starting at [start], inclusive: | |
261 * | |
262 * var string = 'Dartisans'; | |
263 * string.lastIndexOf('a'); // 6 | |
264 * string.lastIndexOf(new RegExp(r'a(r|n)')); // 6 | |
265 * | |
266 * Returns -1 if [other] could not be found. | |
267 * | |
268 * string.lastIndexOf(new RegExp(r'DART')); // -1 | |
269 * | |
270 * [start] must not be negative or greater than [length]. | |
271 */ | |
272 int lastIndexOf(Pattern pattern, [int start]); | |
273 | |
274 /** | |
275 * Returns true if this string is empty. | |
276 */ | |
277 bool get isEmpty; | |
278 | |
279 /** | |
280 * Returns true if this string is not empty. | |
281 */ | |
282 bool get isNotEmpty; | |
283 | |
284 /** | |
285 * Creates a new string by concatenating this string with [other]. | |
286 * | |
287 * 'dart' + 'lang'; // 'dartlang' | |
288 */ | |
289 String operator +(String other); | |
290 | |
291 /** | |
292 * Returns the substring of this string that extends from [startIndex], | |
293 * inclusive, to [endIndex], exclusive. | |
294 * | |
295 * var string = 'dartlang'; | |
296 * string.substring(1); // 'artlang' | |
297 * string.substring(1, 4); // 'art' | |
298 */ | |
299 String substring(int startIndex, [int endIndex]); | |
300 | |
301 /** | |
302 * Returns the string without any leading and trailing whitespace. | |
303 * | |
304 * If the string contains leading or trailing whitespace, a new string with no | |
305 * leading and no trailing whitespace is returned: | |
306 * | |
307 * '\tDart is fun\n'.trim(); // 'Dart is fun' | |
308 * | |
309 * Otherwise, the original string itself is returned: | |
310 * | |
311 * var str1 = 'Dart'; | |
312 * var str2 = str1.trim(); | |
313 * identical(str1, str2); // true | |
314 * | |
315 * Whitespace is defined by the Unicode White_Space property (as defined in | |
316 * version 6.2 or later) and the BOM character, 0xFEFF. | |
317 * | |
318 * Here is the list of trimmed characters (following version 6.2): | |
319 * | |
320 * 0009..000D ; White_Space # Cc <control-0009>..<control-000D> | |
321 * 0020 ; White_Space # Zs SPACE | |
322 * 0085 ; White_Space # Cc <control-0085> | |
323 * 00A0 ; White_Space # Zs NO-BREAK SPACE | |
324 * 1680 ; White_Space # Zs OGHAM SPACE MARK | |
325 * 180E ; White_Space # Zs MONGOLIAN VOWEL SEPARATOR | |
326 * 2000..200A ; White_Space # Zs EN QUAD..HAIR SPACE | |
327 * 2028 ; White_Space # Zl LINE SEPARATOR | |
328 * 2029 ; White_Space # Zp PARAGRAPH SEPARATOR | |
329 * 202F ; White_Space # Zs NARROW NO-BREAK SPACE | |
330 * 205F ; White_Space # Zs MEDIUM MATHEMATICAL SPACE | |
331 * 3000 ; White_Space # Zs IDEOGRAPHIC SPACE | |
332 * | |
333 * FEFF ; BOM ZERO WIDTH NO_BREAK SPACE | |
334 */ | |
335 String trim(); | |
336 | |
337 /** | |
338 * Returns the string without any leading whitespace. | |
339 * | |
340 * As [trim], but only removes leading whitespace. | |
341 */ | |
342 String trimLeft(); | |
343 | |
344 /** | |
345 * Returns the string without any trailing whitespace. | |
346 * | |
347 * As [trim], but only removes trailing whitespace. | |
348 */ | |
349 String trimRight(); | |
350 | |
351 /** | |
352 * Creates a new string by concatenating this string with itself a number | |
353 * of times. | |
354 * | |
355 * The result of `str * n` is equivalent to | |
356 * `str + str + ...`(n times)`... + str`. | |
357 * | |
358 * Returns an empty string if [times] is zero or negative. | |
359 */ | |
360 String operator *(int times); | |
361 | |
362 /** | |
363 * Pads this string on the left if it is shorther than [width]. | |
364 * | |
365 * Return a new string that prepends [padding] onto this string | |
366 * one time for each position the length is less than [width]. | |
367 * | |
368 * If [width] is already smaller than or equal to `this.length`, | |
369 * no padding is added. A negative `width` is treated as zero. | |
370 * | |
371 * If [padding] has length different from 1, the result will not | |
372 * have length `width`. This may be useful for cases where the | |
373 * padding is a longer string representing a single character, like | |
374 * `" "` or `"\u{10002}`". | |
375 * In that case, the user should make sure that `this.length` is | |
376 * the correct measure of the strings length. | |
377 */ | |
378 String padLeft(int width, [String padding = ' ']); | |
379 | |
380 /** | |
381 * Pads this string on the right if it is shorther than [width]. | |
382 * | |
383 * Return a new string that appends [padding] after this string | |
384 * one time for each position the length is less than [width]. | |
385 * | |
386 * If [width] is already smaller than or equal to `this.length`, | |
387 * no padding is added. A negative `width` is treated as zero. | |
388 * | |
389 * If [padding] has length different from 1, the result will not | |
390 * have length `width`. This may be useful for cases where the | |
391 * padding is a longer string representing a single character, like | |
392 * `" "` or `"\u{10002}`". | |
393 * In that case, the user should make sure that `this.length` is | |
394 * the correct measure of the strings length. | |
395 */ | |
396 String padRight(int width, [String padding = ' ']); | |
397 | |
398 /** | |
399 * Returns true if this string contains a match of [other]: | |
400 * | |
401 * var string = 'Dart strings'; | |
402 * string.contains('D'); // true | |
403 * string.contains(new RegExp(r'[A-Z]')); // true | |
404 * | |
405 * If [startIndex] is provided, this method matches only at or after that | |
406 * index: | |
407 * | |
408 * string.contains('X', 1); // false | |
409 * string.contains(new RegExp(r'[A-Z]'), 1); // false | |
410 * | |
411 * [startIndex] must not be negative or greater than [length]. | |
412 */ | |
413 bool contains(Pattern other, [int startIndex = 0]); | |
414 | |
415 /** | |
416 * Returns a new string in which the first occurence of [from] in this string | |
417 * is replaced with [to], starting from [startIndex]: | |
418 * | |
419 * '0.0001'.replaceFirst(new RegExp(r'0'), ''); // '.0001' | |
420 * '0.0001'.replaceFirst(new RegExp(r'0'), '7', 1); // '0.7001' | |
421 */ | |
422 String replaceFirst(Pattern from, String to, [int startIndex = 0]); | |
423 | |
424 /** | |
425 * Replace the first occurence of [from] in this string. | |
426 * | |
427 * Returns a new string, which is this string | |
428 * except that the first match of [pattern], starting from [startIndex], | |
429 * is replaced by the result of calling [replace] with the match object. | |
430 * | |
431 * If the value returned by calling `replace` is not a [String], it | |
432 * is converted to a `String` using its `toString` method, which must | |
433 * then return a string. | |
434 */ | |
435 String replaceFirstMapped(Pattern from, String replace(Match match), | |
436 [int startIndex = 0]); | |
437 | |
438 /** | |
439 * Replaces all substrings that match [from] with [replace]. | |
440 * | |
441 * Returns a new string in which the non-overlapping substrings matching | |
442 * [from] (the ones iterated by `from.allMatches(thisString)`) are replaced | |
443 * by the literal string [replace]. | |
444 * | |
445 * 'resume'.replaceAll(new RegExp(r'e'), 'é'); // 'résumé' | |
446 * | |
447 * Notice that the [replace] string is not interpreted. If the replacement | |
448 * depends on the match (for example on a [RegExp]'s capture groups), use | |
449 * the [replaceAllMapped] method instead. | |
450 */ | |
451 String replaceAll(Pattern from, String replace); | |
452 | |
453 /** | |
454 * Replace all substrings that match [from] by a string computed from the | |
455 * match. | |
456 * | |
457 * Returns a new string in which the non-overlapping substrings that match | |
458 * [from] (the ones iterated by `from.allMatches(thisString)`) are replaced | |
459 * by the result of calling [replace] on the corresponding [Match] object. | |
460 * | |
461 * This can be used to replace matches with new content that depends on the | |
462 * match, unlike [replaceAll] where the replacement string is always the same. | |
463 * | |
464 * The [replace] function is called with the [Match] generated | |
465 * by the pattern, and its result is used as replacement. | |
466 * | |
467 * The function defined below converts each word in a string to simplified | |
468 * 'pig latin' using [replaceAllMapped]: | |
469 * | |
470 * pigLatin(String words) => words.replaceAllMapped( | |
471 * new RegExp(r'\b(\w*?)([aeiou]\w*)', caseSensitive: false), | |
472 * (Match m) => "${m[2]}${m[1]}${m[1].isEmpty ? 'way' : 'ay'}"); | |
473 * | |
474 * pigLatin('I have a secret now!'); // 'Iway avehay away ecretsay ownay!' | |
475 */ | |
476 String replaceAllMapped(Pattern from, String replace(Match match)); | |
477 | |
478 /** | |
479 * Replaces the substring from [start] to [end] with [replacement]. | |
480 * | |
481 * Returns a new string equivalent to: | |
482 * | |
483 * this.substring(0, start) + replacement + this.substring(end) | |
484 * | |
485 * The [start] and [end] indices must specify a valid range of this string. | |
486 * That is `0 <= start <= end <= this.length`. | |
487 * If [end] is `null`, it defaults to [length]. | |
488 */ | |
489 String replaceRange(int start, int end, String replacement); | |
490 | |
491 /** | |
492 * Splits the string at matches of [pattern] and returns a list of substrings. | |
493 * | |
494 * Finds all the matches of `pattern` in this string, | |
495 * and returns the list of the substrings between the matches. | |
496 * | |
497 * var string = "Hello world!"; | |
498 * string.split(" "); // ['Hello', 'world!']; | |
499 * | |
500 * Empty matches at the beginning and end of the strings are ignored, | |
501 * and so are empty matches right after another match. | |
502 * | |
503 * var string = "abba"; | |
504 * string.split(new RegExp(r"b*")); // ['a', 'a'] | |
505 * // not ['', 'a', 'a', ''] | |
506 * | |
507 * If this string is empty, the result is an empty list if `pattern` matches | |
508 * the empty string, and it is `[""]` if the pattern doesn't match. | |
509 * | |
510 * var string = ''; | |
511 * string.split(''); // [] | |
512 * string.split("a"); // [''] | |
513 * | |
514 * Splitting with an empty pattern splits the string into single-code unit | |
515 * strings. | |
516 * | |
517 * var string = 'Pub'; | |
518 * string.split(''); // ['P', 'u', 'b'] | |
519 * | |
520 * string.codeUnits.map((unit) { | |
521 * return new String.fromCharCode(unit); | |
522 * }).toList(); // ['P', 'u', 'b'] | |
523 * | |
524 * Splitting happens at UTF-16 code unit boundaries, | |
525 * and not at rune boundaries: | |
526 * | |
527 * // String made up of two code units, but one rune. | |
528 * string = '\u{1D11E}'; | |
529 * string.split('').length; // 2 surrogate values | |
530 * | |
531 * To get a list of strings containing the individual runes of a string, | |
532 * you should not use split. You can instead map each rune to a string | |
533 * as follows: | |
534 * | |
535 * string.runes.map((rune) => new String.fromCharCode(rune)).toList(); | |
536 */ | |
537 List<String> split(Pattern pattern); | |
538 | |
539 /** | |
540 * Splits the string, converts its parts, and combines them into a new | |
541 * string. | |
542 * | |
543 * [pattern] is used to split the string into parts and separating matches. | |
544 * | |
545 * Each match is converted to a string by calling [onMatch]. If [onMatch] | |
546 * is omitted, the matched string is used. | |
547 * | |
548 * Each non-matched part is converted by a call to [onNonMatch]. If | |
549 * [onNonMatch] is omitted, the non-matching part is used. | |
550 * | |
551 * Then all the converted parts are combined into the resulting string. | |
552 * | |
553 * 'Eats shoots leaves'.splitMapJoin((new RegExp(r'shoots')), | |
554 * onMatch: (m) => '${m.group(0)}', | |
555 * onNonMatch: (n) => '*'); // *shoots* | |
556 */ | |
557 String splitMapJoin(Pattern pattern, | |
558 {String onMatch(Match match), | |
559 String onNonMatch(String nonMatch)}); | |
560 | |
561 /** | |
562 * Returns an unmodifiable list of the UTF-16 code units of this string. | |
563 */ | |
564 List<int> get codeUnits; | |
565 | |
566 /** | |
567 * Returns an [Iterable] of Unicode code-points of this string. | |
568 * | |
569 * If the string contains surrogate pairs, they are combined and returned | |
570 * as one integer by this iterator. Unmatched surrogate halves are treated | |
571 * like valid 16-bit code-units. | |
572 */ | |
573 Runes get runes; | |
574 | |
575 /** | |
576 * Converts all characters in this string to lower case. | |
577 * If the string is already in all lower case, this method returns [:this:]. | |
578 * | |
579 * 'ALPHABET'.toLowerCase(); // 'alphabet' | |
580 * 'abc'.toLowerCase(); // 'abc' | |
581 * | |
582 * This function uses the language independent Unicode mapping and thus only | |
583 * works in some languages. | |
584 */ | |
585 // TODO(floitsch): document better. (See EcmaScript for description). | |
586 String toLowerCase(); | |
587 | |
588 /** | |
589 * Converts all characters in this string to upper case. | |
590 * If the string is already in all upper case, this method returns [:this:]. | |
591 * | |
592 * 'alphabet'.toUpperCase(); // 'ALPHABET' | |
593 * 'ABC'.toUpperCase(); // 'ABC' | |
594 * | |
595 * This function uses the language independent Unicode mapping and thus only | |
596 * works in some languages. | |
597 */ | |
598 // TODO(floitsch): document better. (See EcmaScript for description). | |
599 String toUpperCase(); | |
600 } | |
601 | |
602 /** | |
603 * The runes (integer Unicode code points) of a [String]. | |
604 */ | |
605 class Runes extends Iterable<int> { | |
606 final String string; | |
607 Runes(this.string); | |
608 | |
609 RuneIterator get iterator => new RuneIterator(string); | |
610 | |
611 int get last { | |
612 if (string.length == 0) { | |
613 throw new StateError('No elements.'); | |
614 } | |
615 int length = string.length; | |
616 int code = string.codeUnitAt(length - 1); | |
617 if (_isTrailSurrogate(code) && string.length > 1) { | |
618 int previousCode = string.codeUnitAt(length - 2); | |
619 if (_isLeadSurrogate(previousCode)) { | |
620 return _combineSurrogatePair(previousCode, code); | |
621 } | |
622 } | |
623 return code; | |
624 } | |
625 | |
626 } | |
627 | |
628 // Is then code (a 16-bit unsigned integer) a UTF-16 lead surrogate. | |
629 bool _isLeadSurrogate(int code) => (code & 0xFC00) == 0xD800; | |
630 | |
631 // Is then code (a 16-bit unsigned integer) a UTF-16 trail surrogate. | |
632 bool _isTrailSurrogate(int code) => (code & 0xFC00) == 0xDC00; | |
633 | |
634 // Combine a lead and a trail surrogate value into a single code point. | |
635 int _combineSurrogatePair(int start, int end) { | |
636 return 0x10000 + ((start & 0x3FF) << 10) + (end & 0x3FF); | |
637 } | |
638 | |
639 /** [Iterator] for reading runes (integer Unicode code points) out of a Dart | |
640 * string. | |
641 */ | |
642 class RuneIterator implements BidirectionalIterator<int> { | |
643 /** String being iterated. */ | |
644 final String string; | |
645 /** Position before the current code point. */ | |
646 int _position; | |
647 /** Position after the current code point. */ | |
648 int _nextPosition; | |
649 /** | |
650 * Current code point. | |
651 * | |
652 * If the iterator has hit either end, the [_currentCodePoint] is null | |
653 * and [: _position == _nextPosition :]. | |
654 */ | |
655 int _currentCodePoint; | |
656 | |
657 /** Create an iterator positioned at the beginning of the string. */ | |
658 RuneIterator(String string) | |
659 : this.string = string, _position = 0, _nextPosition = 0; | |
660 | |
661 /** | |
662 * Create an iterator positioned before the [index]th code unit of the string. | |
663 * | |
664 * When created, there is no [current] value. | |
665 * A [moveNext] will use the rune starting at [index] the current value, | |
666 * and a [movePrevious] will use the rune ending just before [index] as the | |
667 * the current value. | |
668 * | |
669 * The [index] position must not be in the middle of a surrogate pair. | |
670 */ | |
671 RuneIterator.at(String string, int index) | |
672 : string = string, _position = index, _nextPosition = index { | |
673 RangeError.checkValueInInterval(index, 0, string.length); | |
674 _checkSplitSurrogate(index); | |
675 } | |
676 | |
677 /** Throw an error if the index is in the middle of a surrogate pair. */ | |
678 void _checkSplitSurrogate(int index) { | |
679 if (index > 0 && index < string.length && | |
680 _isLeadSurrogate(string.codeUnitAt(index - 1)) && | |
681 _isTrailSurrogate(string.codeUnitAt(index))) { | |
682 throw new ArgumentError('Index inside surrogate pair: $index'); | |
683 } | |
684 } | |
685 | |
686 /** | |
687 * Returns the starting position of the current rune in the string. | |
688 * | |
689 * Returns null if the [current] rune is null. | |
690 */ | |
691 int get rawIndex => (_position != _nextPosition) ? _position : null; | |
692 | |
693 /** | |
694 * Resets the iterator to the rune at the specified index of the string. | |
695 * | |
696 * Setting a negative [rawIndex], or one greater than or equal to | |
697 * [:string.length:], | |
698 * is an error. So is setting it in the middle of a surrogate pair. | |
699 * | |
700 * Setting the position to the end of then string will set [current] to null. | |
701 */ | |
702 void set rawIndex(int rawIndex) { | |
703 RangeError.checkValidIndex(rawIndex, string, "rawIndex"); | |
704 reset(rawIndex); | |
705 moveNext(); | |
706 } | |
707 | |
708 /** | |
709 * Resets the iterator to the given index into the string. | |
710 * | |
711 * After this the [current] value is unset. | |
712 * You must call [moveNext] make the rune at the position current, | |
713 * or [movePrevious] for the last rune before the position. | |
714 * | |
715 * Setting a negative [rawIndex], or one greater than [:string.length:], | |
716 * is an error. So is setting it in the middle of a surrogate pair. | |
717 */ | |
718 void reset([int rawIndex = 0]) { | |
719 RangeError.checkValueInInterval(rawIndex, 0, string.length, "rawIndex"); | |
720 _checkSplitSurrogate(rawIndex); | |
721 _position = _nextPosition = rawIndex; | |
722 _currentCodePoint = null; | |
723 } | |
724 | |
725 /** The rune (integer Unicode code point) starting at the current position in | |
726 * the string. | |
727 */ | |
728 int get current => _currentCodePoint; | |
729 | |
730 /** | |
731 * The number of code units comprising the current rune. | |
732 * | |
733 * Returns zero if there is no current rune ([current] is null). | |
734 */ | |
735 int get currentSize => _nextPosition - _position; | |
736 | |
737 /** | |
738 * A string containing the current rune. | |
739 * | |
740 * For runes outside the basic multilingual plane, this will be | |
741 * a String of length 2, containing two code units. | |
742 * | |
743 * Returns null if [current] is null. | |
744 */ | |
745 String get currentAsString { | |
746 if (_position == _nextPosition) return null; | |
747 if (_position + 1 == _nextPosition) return string[_position]; | |
748 return string.substring(_position, _nextPosition); | |
749 } | |
750 | |
751 bool moveNext() { | |
752 _position = _nextPosition; | |
753 if (_position == string.length) { | |
754 _currentCodePoint = null; | |
755 return false; | |
756 } | |
757 int codeUnit = string.codeUnitAt(_position); | |
758 int nextPosition = _position + 1; | |
759 if (_isLeadSurrogate(codeUnit) && nextPosition < string.length) { | |
760 int nextCodeUnit = string.codeUnitAt(nextPosition); | |
761 if (_isTrailSurrogate(nextCodeUnit)) { | |
762 _nextPosition = nextPosition + 1; | |
763 _currentCodePoint = _combineSurrogatePair(codeUnit, nextCodeUnit); | |
764 return true; | |
765 } | |
766 } | |
767 _nextPosition = nextPosition; | |
768 _currentCodePoint = codeUnit; | |
769 return true; | |
770 } | |
771 | |
772 bool movePrevious() { | |
773 _nextPosition = _position; | |
774 if (_position == 0) { | |
775 _currentCodePoint = null; | |
776 return false; | |
777 } | |
778 int position = _position - 1; | |
779 int codeUnit = string.codeUnitAt(position); | |
780 if (_isTrailSurrogate(codeUnit) && position > 0) { | |
781 int prevCodeUnit = string.codeUnitAt(position - 1); | |
782 if (_isLeadSurrogate(prevCodeUnit)) { | |
783 _position = position - 1; | |
784 _currentCodePoint = _combineSurrogatePair(prevCodeUnit, codeUnit); | |
785 return true; | |
786 } | |
787 } | |
788 _position = position; | |
789 _currentCodePoint = codeUnit; | |
790 return true; | |
791 } | |
792 } | |
OLD | NEW |