OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 /** | 5 /** |
6 * [_StringBase] contains common methods used by concrete String | 6 * [_StringBase] contains common methods used by concrete String |
7 * implementations, e.g., _OneByteString. | 7 * implementations, e.g., _OneByteString. |
8 */ | 8 */ |
9 class _StringBase { | 9 class _StringBase { |
10 | 10 |
11 factory _StringBase._uninstantiable() { | 11 factory _StringBase._uninstantiable() { |
12 throw new UnsupportedError( | 12 throw new UnsupportedError( |
13 "_StringBase can't be instaniated"); | 13 "_StringBase can't be instaniated"); |
14 } | 14 } |
15 | 15 |
16 int get hashCode native "String_getHashCode"; | 16 int get hashCode native "String_getHashCode"; |
17 | 17 |
18 /** | 18 /** |
19 * Create the most efficient string representation for the specified UTF-16 | 19 * Create the most efficient string representation for specified |
20 * [codeUnits]. | 20 * [codePoints]. |
21 */ | 21 */ |
22 static String createFromUtf16(List<int> codeUnits) { | 22 static String createFromCharCodes(List<int> charCodes) { |
23 _ObjectArray objectArray; | 23 _ObjectArray objectArray; |
24 if (codeUnits is _ObjectArray) { | 24 if (charCodes is _ObjectArray) { |
25 objectArray = codeUnits; | 25 objectArray = charCodes; |
26 } else { | 26 } else { |
27 int len = codeUnits.length; | 27 int len = charCodes.length; |
28 objectArray = new _ObjectArray(len); | 28 objectArray = new _ObjectArray(len); |
29 for (int i = 0; i < len; i++) { | 29 for (int i = 0; i < len; i++) { |
30 objectArray[i] = codeUnits[i]; | 30 objectArray[i] = charCodes[i]; |
31 } | 31 } |
32 } | 32 } |
33 return _createFromUtf16(objectArray); | 33 return _createFromCodePoints(objectArray); |
34 } | 34 } |
35 | 35 |
36 static String _createFromUtf16(List<int> codeUnits) | 36 static String _createFromCodePoints(List<int> codePoints) |
37 native "StringBase_createFromUtf16"; | 37 native "StringBase_createFromCodePoints"; |
38 | 38 |
39 String operator [](int index) native "String_charAt"; | 39 String operator [](int index) native "String_charAt"; |
40 | 40 |
41 int codeUnitAt(int index) native "String_codeUnitAt"; | 41 int charCodeAt(int index) native "String_charCodeAt"; |
42 | 42 |
43 int get length native "String_getLength"; | 43 int get length native "String_getLength"; |
44 | 44 |
45 bool get isEmpty { | 45 bool get isEmpty { |
46 return this.length == 0; | 46 return this.length == 0; |
47 } | 47 } |
48 | 48 |
49 String concat(String other) native "String_concat"; | 49 String concat(String other) native "String_concat"; |
50 | 50 |
51 String toString() { | 51 String toString() { |
(...skipping 10 matching lines...) Expand all Loading... |
62 return false; | 62 return false; |
63 } | 63 } |
64 return this.compareTo(other) == 0; | 64 return this.compareTo(other) == 0; |
65 } | 65 } |
66 | 66 |
67 int compareTo(String other) { | 67 int compareTo(String other) { |
68 int thisLength = this.length; | 68 int thisLength = this.length; |
69 int otherLength = other.length; | 69 int otherLength = other.length; |
70 int len = (thisLength < otherLength) ? thisLength : otherLength; | 70 int len = (thisLength < otherLength) ? thisLength : otherLength; |
71 for (int i = 0; i < len; i++) { | 71 for (int i = 0; i < len; i++) { |
72 int thisCodeUnit = this.codeUnitAt(i); | 72 int thisCodePoint = this.charCodeAt(i); |
73 int otherCodeUnit = other.codeUnitAt(i); | 73 int otherCodePoint = other.charCodeAt(i); |
74 if (thisCodeUnit < otherCodeUnit) { | 74 if (thisCodePoint < otherCodePoint) { |
75 return -1; | 75 return -1; |
76 } | 76 } |
77 if (thisCodeUnit > otherCodeUnit) { | 77 if (thisCodePoint > otherCodePoint) { |
78 return 1; | 78 return 1; |
79 } | 79 } |
80 } | 80 } |
81 if (thisLength < otherLength) return -1; | 81 if (thisLength < otherLength) return -1; |
82 if (thisLength > otherLength) return 1; | 82 if (thisLength > otherLength) return 1; |
83 return 0; | 83 return 0; |
84 } | 84 } |
85 | 85 |
86 bool _substringMatches(int start, String other) { | 86 bool _substringMatches(int start, String other) { |
87 if (other.isEmpty) return true; | 87 if (other.isEmpty) return true; |
88 if ((start < 0) || (start >= this.length)) { | 88 if ((start < 0) || (start >= this.length)) { |
89 return false; | 89 return false; |
90 } | 90 } |
91 final int len = other.length; | 91 final int len = other.length; |
92 if ((start + len) > this.length) { | 92 if ((start + len) > this.length) { |
93 return false; | 93 return false; |
94 } | 94 } |
95 for (int i = 0; i < len; i++) { | 95 for (int i = 0; i < len; i++) { |
96 if (this.codeUnitAt(i + start) != other.codeUnitAt(i)) { | 96 if (this.charCodeAt(i + start) != other.charCodeAt(i)) { |
97 return false; | 97 return false; |
98 } | 98 } |
99 } | 99 } |
100 return true; | 100 return true; |
101 } | 101 } |
102 | 102 |
103 bool endsWith(String other) { | 103 bool endsWith(String other) { |
104 return _substringMatches(this.length - other.length, other); | 104 return _substringMatches(this.length - other.length, other); |
105 } | 105 } |
106 | 106 |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
155 return _substringUnchecked(startIndex, endIndex); | 155 return _substringUnchecked(startIndex, endIndex); |
156 } | 156 } |
157 | 157 |
158 String _substringUnchecked(int startIndex, int endIndex) | 158 String _substringUnchecked(int startIndex, int endIndex) |
159 native "StringBase_substringUnchecked"; | 159 native "StringBase_substringUnchecked"; |
160 | 160 |
161 String trim() { | 161 String trim() { |
162 final int len = this.length; | 162 final int len = this.length; |
163 int first = 0; | 163 int first = 0; |
164 for (; first < len; first++) { | 164 for (; first < len; first++) { |
165 // There are no whitespace characters that are outside the BMP so we | 165 if (!_isWhitespace(this.charCodeAt(first))) { |
166 // can use code units here for efficiency. | |
167 if (!_isWhitespace(this.codeUnitAt(first))) { | |
168 break; | 166 break; |
169 } | 167 } |
170 } | 168 } |
171 if (len == first) { | 169 if (len == first) { |
172 // String contains only whitespaces. | 170 // String contains only whitespaces. |
173 return ""; | 171 return ""; |
174 } | 172 } |
175 int last = len - 1; | 173 int last = len - 1; |
176 for (; last >= first; last--) { | 174 for (; last >= first; last--) { |
177 if (!_isWhitespace(this.codeUnitAt(last))) { | 175 if (!_isWhitespace(this.charCodeAt(last))) { |
178 break; | 176 break; |
179 } | 177 } |
180 } | 178 } |
181 if ((first == 0) && (last == (len - 1))) { | 179 if ((first == 0) && (last == (len - 1))) { |
182 // Returns this string if it does not have leading or trailing | 180 // Returns this string if it does not have leading or trailing |
183 // whitespaces. | 181 // whitespaces. |
184 return this; | 182 return this; |
185 } else { | 183 } else { |
186 return _substringUnchecked(first, last + 1); | 184 return _substringUnchecked(first, last + 1); |
187 } | 185 } |
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
288 if (startIndex == endIndex && endIndex == previousIndex) { | 286 if (startIndex == endIndex && endIndex == previousIndex) { |
289 ++startIndex; // empty match, advance and restart | 287 ++startIndex; // empty match, advance and restart |
290 continue; | 288 continue; |
291 } | 289 } |
292 result.add(this.substring(previousIndex, match.start)); | 290 result.add(this.substring(previousIndex, match.start)); |
293 startIndex = previousIndex = endIndex; | 291 startIndex = previousIndex = endIndex; |
294 } | 292 } |
295 return result; | 293 return result; |
296 } | 294 } |
297 | 295 |
298 // TODO(erikcorry): Fix this to use the new code point iterator when it is | |
299 // available. | |
300 List<String> splitChars() { | 296 List<String> splitChars() { |
301 int len = this.length; | 297 int len = this.length; |
302 final result = new List<String>(len); | 298 final result = new List<String>(len); |
303 bool supplementaryCharacterSeen = false; | 299 for (int i = 0; i < len; i++) { |
304 int i, j; | 300 result[i] = this[i]; |
305 for (i = j = 0; i < len; i++, j++) { | |
306 int c = charCodeAt(i); | |
307 // Check for non-basic plane character encoded as a UTF-16 surrogate pair. | |
308 if (c >= String.SUPPLEMENTARY_CODE_POINT_BASE) { | |
309 i++; | |
310 supplementaryCharacterSeen = true; | |
311 } | |
312 result[j] = new String.fromCharCodes([c]); | |
313 } | 301 } |
314 if (!supplementaryCharacterSeen) return result; | 302 return result; |
315 // If we saw some non-basic plane characters, then we have to return a | |
316 // slightly smaller array than expected (we can't trim the original one | |
317 // because it is non-extendable). This rarely happens so this is preferable | |
318 // to having a separate pass over the string to count the code points. | |
319 return result.getRange(0, j); | |
320 } | 303 } |
321 | 304 |
322 List<int> get codeUnits { | 305 List<int> get charCodes { |
323 int len = this.length; | 306 int len = this.length; |
324 final result = new List<int>(len); | 307 final result = new List<int>(len); |
325 for (int i = 0; i < len; i++) { | 308 for (int i = 0; i < len; i++) { |
326 result[i] = this.codeUnitAt(i); | 309 result[i] = this.charCodeAt(i); |
327 } | 310 } |
328 return result; | 311 return result; |
329 } | 312 } |
330 | 313 |
331 String toUpperCase() native "String_toUpperCase"; | 314 String toUpperCase() native "String_toUpperCase"; |
332 | 315 |
333 String toLowerCase() native "String_toLowerCase"; | 316 String toLowerCase() native "String_toLowerCase"; |
334 | 317 |
335 // Implementations of Strings methods follow below. | 318 // Implementations of Strings methods follow below. |
336 static String join(List<String> strings, String separator) { | 319 static String join(List<String> strings, String separator) { |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
370 native "Strings_concatAll"; | 353 native "Strings_concatAll"; |
371 } | 354 } |
372 | 355 |
373 | 356 |
374 class _OneByteString extends _StringBase implements String { | 357 class _OneByteString extends _StringBase implements String { |
375 factory _OneByteString._uninstantiable() { | 358 factory _OneByteString._uninstantiable() { |
376 throw new UnsupportedError( | 359 throw new UnsupportedError( |
377 "_OneByteString can only be allocated by the VM"); | 360 "_OneByteString can only be allocated by the VM"); |
378 } | 361 } |
379 | 362 |
| 363 // Checks for one-byte whitespaces only. |
| 364 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid |
| 365 // whitespaces for one byte strings. |
380 bool _isWhitespace(int codePoint) { | 366 bool _isWhitespace(int codePoint) { |
381 return | 367 return |
382 (codePoint == 32) || // Space. | 368 (codePoint == 32) || // Space. |
383 (codePoint == 0xa0) || // No-break space. | |
384 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. | 369 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. |
385 } | 370 } |
386 | 371 |
387 int charCodeAt(int index) => codeUnitAt(index); | |
388 | |
389 List<int> get charCodes => codeUnits; | |
390 } | 372 } |
391 | 373 |
392 | 374 |
393 class _TwoByteStringBase extends _StringBase { | 375 class _TwoByteString extends _StringBase implements String { |
394 factory _TwoByteStringBase._uninstantiable() { | 376 factory _TwoByteString._uninstantiable() { |
395 throw new UnsupportedError( | 377 throw new UnsupportedError( |
396 "_TwoByteStringBase can't be instaniated"); | 378 "_TwoByteString can only be allocated by the VM"); |
397 } | 379 } |
398 | 380 |
399 // Works for both code points and code units since all spaces are in the BMP. | 381 // Checks for one-byte whitespaces only. |
| 382 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid |
| 383 // whitespaces. Add checking for multi-byte whitespace codepoints. |
400 bool _isWhitespace(int codePoint) { | 384 bool _isWhitespace(int codePoint) { |
401 return | 385 return |
402 (codePoint == 32) || // Space. | 386 (codePoint == 32) || // Space. |
403 (codePoint == 0xa0) || // No-break space. | 387 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. |
404 ((9 <= codePoint) && (codePoint <= 13)) || // CR, LF, TAB, etc. | |
405 (codePoint >= 0x1680 && // Optimization. | |
406 (codePoint == 0x1680 || // Ogham space mark. | |
407 codePoint == 0x180e || // Mongolian vowel separator. | |
408 (codePoint >= 0x2000 && codePoint <= 0x200a) || // Wide/narrow spaces. | |
409 codePoint == 0x2028 || // Line separator. | |
410 codePoint == 0x2029 || // Paragraph separator. | |
411 codePoint == 0x202f || // Narrow no-break space. | |
412 codePoint == 0x205f || // Medium mathematical space. | |
413 codePoint == 0x3000 || // Ideographic space. | |
414 codePoint == 0xfeff)); // BOM code. | |
415 } | |
416 | |
417 int charCodeAt(int index) { | |
418 const int LEAD_SURROGATE_BASE = 0xd800; | |
419 const int LEAD_SURROGATE_END = 0xdbff; | |
420 const int TRAIL_SURROGATE_BASE = 0xdc00; | |
421 const int TRAIL_SURROGATE_END = 0xdfff; | |
422 const int MASK = 0x3ff; | |
423 int code = codeUnitAt(index); | |
424 if (code < LEAD_SURROGATE_BASE || code > LEAD_SURROGATE_END) return code; | |
425 if (index + 1 >= length) return code; | |
426 int trail = codeUnitAt(index + 1); | |
427 if (trail < TRAIL_SURROGATE_BASE || trail > TRAIL_SURROGATE_END) { | |
428 return code; | |
429 } | |
430 return String.SUPPLEMENTARY_CODE_POINT_BASE + | |
431 ((code & MASK) << 10) + (trail & MASK); | |
432 } | |
433 | |
434 // TODO(erikcorry): Fix this to use the new code point iterator when it is | |
435 // available. | |
436 List<int> get charCodes { | |
437 int len = this.length; | |
438 final result = new List<int>(len); | |
439 bool supplementaryCharacterSeen = false; | |
440 int i, j; | |
441 for (i = j = 0; i < len; i++, j++) { | |
442 int c = this.charCodeAt(i); | |
443 // Check for supplementary plane character encoded as a UTF-16 surrogate | |
444 // pair. | |
445 if (c >= String.SUPPLEMENTARY_CODE_POINT_BASE) { | |
446 i++; | |
447 supplementaryCharacterSeen = true; | |
448 } | |
449 result[j] = c; | |
450 } | |
451 if (!supplementaryCharacterSeen) return result; | |
452 // If we saw some non-basic plane characters, then we have to return a | |
453 // slightly smaller array than expected (we can't trim the original one | |
454 // because it is non-extendable). This rarely happens so this is preferable | |
455 // to having a separate pass over the string to count the code points. | |
456 return result.getRange(0, j); | |
457 } | 388 } |
458 } | 389 } |
459 | 390 |
460 | 391 |
461 class _TwoByteString extends _TwoByteStringBase implements String { | 392 class _FourByteString extends _StringBase implements String { |
462 factory _TwoByteString._uninstantiable() { | 393 factory _FourByteString._uninstantiable() { |
463 throw new UnsupportedError( | 394 throw new UnsupportedError( |
464 "_TwoByteString can only be allocated by the VM"); | 395 "_FourByteString can only be allocated by the VM"); |
| 396 } |
| 397 |
| 398 // Checks for one-byte whitespaces only. |
| 399 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid |
| 400 // whitespaces. Add checking for multi-byte whitespace codepoints. |
| 401 bool _isWhitespace(int codePoint) { |
| 402 return |
| 403 (codePoint == 32) || // Space. |
| 404 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. |
465 } | 405 } |
466 } | 406 } |
467 | 407 |
468 | 408 |
469 class _ExternalOneByteString extends _StringBase implements String { | 409 class _ExternalOneByteString extends _StringBase implements String { |
470 factory _ExternalOneByteString._uninstantiable() { | 410 factory _ExternalOneByteString._uninstantiable() { |
471 throw new UnsupportedError( | 411 throw new UnsupportedError( |
472 "_ExternalOneByteString can only be allocated by the VM"); | 412 "_ExternalOneByteString can only be allocated by the VM"); |
473 } | 413 } |
474 | 414 |
| 415 // Checks for one-byte whitespaces only. |
| 416 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid |
| 417 // whitespaces for one byte strings. |
475 bool _isWhitespace(int codePoint) { | 418 bool _isWhitespace(int codePoint) { |
476 return | 419 return |
477 (codePoint == 32) || // Space. | 420 (codePoint == 32) || // Space. |
478 (codePoint == 0xa0) || // No-break space. | |
479 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. | 421 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. |
480 } | 422 } |
481 | |
482 int charCodeAt(int index) => codeUnitAt(index); | |
483 | |
484 List<int> get charCodes => codeUnits; | |
485 } | 423 } |
486 | 424 |
487 | 425 |
488 class _ExternalTwoByteString extends _TwoByteStringBase implements String { | 426 class _ExternalTwoByteString extends _StringBase implements String { |
489 factory _ExternalTwoByteString._uninstantiable() { | 427 factory _ExternalTwoByteString._uninstantiable() { |
490 throw new UnsupportedError( | 428 throw new UnsupportedError( |
491 "_ExternalTwoByteString can only be allocated by the VM"); | 429 "_ExternalTwoByteString can only be allocated by the VM"); |
492 } | 430 } |
| 431 |
| 432 // Checks for one-byte whitespaces only. |
| 433 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid |
| 434 // whitespaces. Add checking for multi-byte whitespace codepoints. |
| 435 bool _isWhitespace(int codePoint) { |
| 436 return |
| 437 (codePoint == 32) || // Space. |
| 438 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. |
| 439 } |
493 } | 440 } |
494 | 441 |
495 | 442 |
| 443 class _ExternalFourByteString extends _StringBase implements String { |
| 444 factory _ExternalFourByteString._uninstantiable() { |
| 445 throw new UnsupportedError( |
| 446 "ExternalFourByteString can only be allocated by the VM"); |
| 447 } |
| 448 |
| 449 // Checks for one-byte whitespaces only. |
| 450 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid |
| 451 // whitespaces. Add checking for multi-byte whitespace codepoints. |
| 452 bool _isWhitespace(int codePoint) { |
| 453 return |
| 454 (codePoint == 32) || // Space. |
| 455 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. |
| 456 } |
| 457 } |
| 458 |
| 459 |
496 class _StringMatch implements Match { | 460 class _StringMatch implements Match { |
497 const _StringMatch(int this.start, | 461 const _StringMatch(int this.start, |
498 String this.str, | 462 String this.str, |
499 String this.pattern); | 463 String this.pattern); |
500 | 464 |
501 int get end => start + pattern.length; | 465 int get end => start + pattern.length; |
502 String operator[](int g) => group(g); | 466 String operator[](int g) => group(g); |
503 int get groupCount => 0; | 467 int get groupCount => 0; |
504 | 468 |
505 String group(int group) { | 469 String group(int group) { |
506 if (group != 0) { | 470 if (group != 0) { |
507 throw new RangeError.value(group); | 471 throw new RangeError.value(group); |
508 } | 472 } |
509 return pattern; | 473 return pattern; |
510 } | 474 } |
511 | 475 |
512 List<String> groups(List<int> groups) { | 476 List<String> groups(List<int> groups) { |
513 List<String> result = new List<String>(); | 477 List<String> result = new List<String>(); |
514 for (int g in groups) { | 478 for (int g in groups) { |
515 result.add(group(g)); | 479 result.add(group(g)); |
516 } | 480 } |
517 return result; | 481 return result; |
518 } | 482 } |
519 | 483 |
520 final int start; | 484 final int start; |
521 final String str; | 485 final String str; |
522 final String pattern; | 486 final String pattern; |
523 } | 487 } |
OLD | NEW |