OLD | NEW |
---|---|
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 /** | 5 /** |
6 * [_StringBase] contains common methods used by concrete String | 6 * [_StringBase] contains common methods used by concrete String |
7 * implementations, e.g., _OneByteString. | 7 * implementations, e.g., _OneByteString. |
8 */ | 8 */ |
9 class _StringBase { | 9 class _StringBase { |
10 | 10 |
11 factory _StringBase._uninstantiable() { | 11 factory _StringBase._uninstantiable() { |
12 throw new UnsupportedError( | 12 throw new UnsupportedError( |
13 "_StringBase can't be instaniated"); | 13 "_StringBase can't be instaniated"); |
14 } | 14 } |
15 | 15 |
16 int get hashCode native "String_getHashCode"; | 16 int get hashCode native "String_getHashCode"; |
17 | 17 |
18 /** | 18 static String createFromUtf16(List<int> codeUnits) { |
siva
2012/11/16 22:32:04
The comment
"Create the .... for specified UTF-16
erikcorry
2012/11/19 12:40:41
Done.
| |
19 * Create the most efficient string representation for specified | |
20 * [codePoints]. | |
21 */ | |
22 static String createFromCharCodes(List<int> charCodes) { | |
23 _ObjectArray objectArray; | 19 _ObjectArray objectArray; |
24 if (charCodes is _ObjectArray) { | 20 if (codeUnits is _ObjectArray) { |
25 objectArray = charCodes; | 21 objectArray = codeUnits; |
26 } else { | 22 } else { |
27 int len = charCodes.length; | 23 int len = codeUnits.length; |
28 objectArray = new _ObjectArray(len); | 24 objectArray = new _ObjectArray(len); |
29 for (int i = 0; i < len; i++) { | 25 for (int i = 0; i < len; i++) { |
30 objectArray[i] = charCodes[i]; | 26 objectArray[i] = codeUnits[i]; |
31 } | 27 } |
32 } | 28 } |
33 return _createFromCodePoints(objectArray); | 29 return _createFromUtf16(objectArray); |
34 } | 30 } |
35 | 31 |
36 static String _createFromCodePoints(List<int> codePoints) | 32 static String _createFromUtf16(List<int> codeUnits) |
37 native "StringBase_createFromCodePoints"; | 33 native "StringBase_createFromUtf16"; |
38 | 34 |
39 String operator [](int index) native "String_charAt"; | 35 String operator [](int index) native "String_charAt"; |
40 | 36 |
41 int charCodeAt(int index) native "String_charCodeAt"; | 37 int codeUnitAt(int index) native "String_codeUnitAt"; |
42 | 38 |
43 int get length native "String_getLength"; | 39 int get length native "String_getLength"; |
44 | 40 |
45 bool get isEmpty { | 41 bool get isEmpty { |
46 return this.length == 0; | 42 return this.length == 0; |
47 } | 43 } |
48 | 44 |
49 String concat(String other) native "String_concat"; | 45 String concat(String other) native "String_concat"; |
50 | 46 |
51 String toString() { | 47 String toString() { |
(...skipping 10 matching lines...) Expand all Loading... | |
62 return false; | 58 return false; |
63 } | 59 } |
64 return this.compareTo(other) == 0; | 60 return this.compareTo(other) == 0; |
65 } | 61 } |
66 | 62 |
67 int compareTo(String other) { | 63 int compareTo(String other) { |
68 int thisLength = this.length; | 64 int thisLength = this.length; |
69 int otherLength = other.length; | 65 int otherLength = other.length; |
70 int len = (thisLength < otherLength) ? thisLength : otherLength; | 66 int len = (thisLength < otherLength) ? thisLength : otherLength; |
71 for (int i = 0; i < len; i++) { | 67 for (int i = 0; i < len; i++) { |
72 int thisCodePoint = this.charCodeAt(i); | 68 int thisCodeUnit = this.codeUnitAt(i); |
73 int otherCodePoint = other.charCodeAt(i); | 69 int otherCodeUnit = other.codeUnitAt(i); |
74 if (thisCodePoint < otherCodePoint) { | 70 if (thisCodeUnit < otherCodeUnit) { |
75 return -1; | 71 return -1; |
76 } | 72 } |
77 if (thisCodePoint > otherCodePoint) { | 73 if (thisCodeUnit > otherCodeUnit) { |
78 return 1; | 74 return 1; |
79 } | 75 } |
80 } | 76 } |
81 if (thisLength < otherLength) return -1; | 77 if (thisLength < otherLength) return -1; |
82 if (thisLength > otherLength) return 1; | 78 if (thisLength > otherLength) return 1; |
83 return 0; | 79 return 0; |
84 } | 80 } |
85 | 81 |
86 bool _substringMatches(int start, String other) { | 82 bool _substringMatches(int start, String other) { |
87 if (other.isEmpty) return true; | 83 if (other.isEmpty) return true; |
88 if ((start < 0) || (start >= this.length)) { | 84 if ((start < 0) || (start >= this.length)) { |
89 return false; | 85 return false; |
90 } | 86 } |
91 final int len = other.length; | 87 final int len = other.length; |
92 if ((start + len) > this.length) { | 88 if ((start + len) > this.length) { |
93 return false; | 89 return false; |
94 } | 90 } |
95 for (int i = 0; i < len; i++) { | 91 for (int i = 0; i < len; i++) { |
96 if (this.charCodeAt(i + start) != other.charCodeAt(i)) { | 92 if (this.codeUnitAt(i + start) != other.codeUnitAt(i)) { |
97 return false; | 93 return false; |
98 } | 94 } |
99 } | 95 } |
100 return true; | 96 return true; |
101 } | 97 } |
102 | 98 |
103 bool endsWith(String other) { | 99 bool endsWith(String other) { |
104 return _substringMatches(this.length - other.length, other); | 100 return _substringMatches(this.length - other.length, other); |
105 } | 101 } |
106 | 102 |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
155 return _substringUnchecked(startIndex, endIndex); | 151 return _substringUnchecked(startIndex, endIndex); |
156 } | 152 } |
157 | 153 |
158 String _substringUnchecked(int startIndex, int endIndex) | 154 String _substringUnchecked(int startIndex, int endIndex) |
159 native "StringBase_substringUnchecked"; | 155 native "StringBase_substringUnchecked"; |
160 | 156 |
161 String trim() { | 157 String trim() { |
162 final int len = this.length; | 158 final int len = this.length; |
163 int first = 0; | 159 int first = 0; |
164 for (; first < len; first++) { | 160 for (; first < len; first++) { |
165 if (!_isWhitespace(this.charCodeAt(first))) { | 161 // There are no whitespace characters that are outside the BMP so we |
162 // can use code units here for efficiency. | |
163 if (!_isWhitespace(this.codeUnitAt(first))) { | |
166 break; | 164 break; |
167 } | 165 } |
168 } | 166 } |
169 if (len == first) { | 167 if (len == first) { |
170 // String contains only whitespaces. | 168 // String contains only whitespaces. |
171 return ""; | 169 return ""; |
172 } | 170 } |
173 int last = len - 1; | 171 int last = len - 1; |
174 for (; last >= first; last--) { | 172 for (; last >= first; last--) { |
175 if (!_isWhitespace(this.charCodeAt(last))) { | 173 if (!_isWhitespace(this.codeUnitAt(last))) { |
176 break; | 174 break; |
177 } | 175 } |
178 } | 176 } |
179 if ((first == 0) && (last == (len - 1))) { | 177 if ((first == 0) && (last == (len - 1))) { |
180 // Returns this string if it does not have leading or trailing | 178 // Returns this string if it does not have leading or trailing |
181 // whitespaces. | 179 // whitespaces. |
182 return this; | 180 return this; |
183 } else { | 181 } else { |
184 return _substringUnchecked(first, last + 1); | 182 return _substringUnchecked(first, last + 1); |
185 } | 183 } |
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
286 if (startIndex == endIndex && endIndex == previousIndex) { | 284 if (startIndex == endIndex && endIndex == previousIndex) { |
287 ++startIndex; // empty match, advance and restart | 285 ++startIndex; // empty match, advance and restart |
288 continue; | 286 continue; |
289 } | 287 } |
290 result.add(this.substring(previousIndex, match.start)); | 288 result.add(this.substring(previousIndex, match.start)); |
291 startIndex = previousIndex = endIndex; | 289 startIndex = previousIndex = endIndex; |
292 } | 290 } |
293 return result; | 291 return result; |
294 } | 292 } |
295 | 293 |
294 // TODO(erikcorry): Fix this to use the new code point iterator when it is | |
295 // available. | |
296 List<String> splitChars() { | 296 List<String> splitChars() { |
297 int len = this.length; | 297 int len = this.length; |
298 final result = new List<String>(len); | 298 final result = new List<String>(len); |
299 int i, j; | |
300 for (i = j = 0; i < len; i++, j++) { | |
301 int c = charCodeAt(i); | |
302 // Check for non-basic plane character encoded as a UTF-16 surrogate pair. | |
303 if (c >= String.SMP_CODE_POINT_BASE) { | |
304 i++; | |
305 } | |
306 result[j] = new String.fromCharCodes([c]); | |
307 } | |
308 if (i == j) return result; | |
309 // If we saw some non-basic plane characters, then we have to return a | |
310 // slightly smaller array than expected (we can't trim the original one | |
311 // because it is non-extendable). This rarely happens so this is preferable | |
312 // to having a separate pass over the string to count the code points. | |
313 final newResult = new List<String>(j); | |
314 for (i = 0; i < j; i++) newResult[i] = result[i]; | |
315 return newResult; | |
siva
2012/11/16 22:32:04
This piece of code is repeated 3 times in this CL,
erikcorry
2012/11/19 12:40:41
One copy removed, last three lines changed to a ge
| |
316 } | |
317 | |
318 List<int> get codeUnits { | |
319 int len = this.length; | |
320 final result = new List<int>(len); | |
299 for (int i = 0; i < len; i++) { | 321 for (int i = 0; i < len; i++) { |
300 result[i] = this[i]; | 322 result[i] = this.codeUnitAt(i); |
301 } | 323 } |
302 return result; | 324 return result; |
303 } | 325 } |
304 | |
305 List<int> get charCodes { | |
306 int len = this.length; | |
307 final result = new List<int>(len); | |
308 for (int i = 0; i < len; i++) { | |
309 result[i] = this.charCodeAt(i); | |
310 } | |
311 return result; | |
312 } | |
313 | 326 |
314 String toUpperCase() native "String_toUpperCase"; | 327 String toUpperCase() native "String_toUpperCase"; |
315 | 328 |
316 String toLowerCase() native "String_toLowerCase"; | 329 String toLowerCase() native "String_toLowerCase"; |
317 | 330 |
318 // Implementations of Strings methods follow below. | 331 // Implementations of Strings methods follow below. |
319 static String join(List<String> strings, String separator) { | 332 static String join(List<String> strings, String separator) { |
320 final int length = strings.length; | 333 final int length = strings.length; |
321 if (length == 0) { | 334 if (length == 0) { |
322 return ""; | 335 return ""; |
(...skipping 30 matching lines...) Expand all Loading... | |
353 native "Strings_concatAll"; | 366 native "Strings_concatAll"; |
354 } | 367 } |
355 | 368 |
356 | 369 |
357 class _OneByteString extends _StringBase implements String { | 370 class _OneByteString extends _StringBase implements String { |
358 factory _OneByteString._uninstantiable() { | 371 factory _OneByteString._uninstantiable() { |
359 throw new UnsupportedError( | 372 throw new UnsupportedError( |
360 "_OneByteString can only be allocated by the VM"); | 373 "_OneByteString can only be allocated by the VM"); |
361 } | 374 } |
362 | 375 |
363 // Checks for one-byte whitespaces only. | |
364 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid | |
365 // whitespaces for one byte strings. | |
366 bool _isWhitespace(int codePoint) { | 376 bool _isWhitespace(int codePoint) { |
367 return | 377 return |
368 (codePoint == 32) || // Space. | 378 (codePoint == 32) || // Space. |
379 (codePoint == 0xa0) || // No-break space. | |
369 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. | 380 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. |
370 } | 381 } |
371 | 382 |
383 int charCodeAt(int index) => codeUnitAt(index); | |
384 | |
385 List<int> get charCodes => codeUnits; | |
372 } | 386 } |
373 | 387 |
374 | 388 |
375 class _TwoByteString extends _StringBase implements String { | 389 class _TwoByteStringBase extends _StringBase { |
siva
2012/11/16 22:32:04
Maybe add a TODO here to get rid of this class and
erikcorry
2012/11/19 12:40:41
I don't see why we would want to do that.
| |
390 factory _TwoByteStringBase._uninstantiable() { | |
391 throw new UnsupportedError( | |
392 "_TwoByteStringBase can't be instaniated"); | |
393 } | |
394 | |
395 // Works for both code points and code units since all spaces are in the BMP. | |
396 bool _isWhitespace(int codePoint) { | |
397 return | |
398 (codePoint == 32) || // Space. | |
399 (codePoint == 0xa0) || // No-break space. | |
400 ((9 <= codePoint) && (codePoint <= 13)) || // CR, LF, TAB, etc. | |
401 (codePoint >= 0x1680 && // Optimization. | |
402 (codePoint == 0x1680 || // Ogham space mark. | |
403 codePoint == 0x180e || // Mongolian vowel separator. | |
404 (codePoint >= 0x2000 && codePoint <= 0x200a) || // Wide/narrow spaces. | |
405 codePoint == 0x2028 || // Line separator. | |
406 codePoint == 0x2029 || // Paragraph separator. | |
407 codePoint == 0x202f || // Narrow no-break space. | |
408 codePoint == 0x205f || // Medium mathematical space. | |
409 codePoint == 0x3000 || // Ideographic space. | |
410 codePoint == 0xfeff)); // BOM code. | |
411 } | |
412 | |
413 int charCodeAt(int index) { | |
414 const int LEAD_SURROGATE_BASE = 0xd800; | |
415 const int LEAD_SURROGATE_END = 0xdbff; | |
416 const int TRAIL_SURROGATE_BASE = 0xdc00; | |
417 const int TRAIL_SURROGATE_END = 0xdfff; | |
418 const int MASK = 0x3ff; | |
419 int code = codeUnitAt(index); | |
420 if (code < LEAD_SURROGATE_BASE || code > LEAD_SURROGATE_END) return code; | |
421 if (index + 1 >= length) return code; | |
422 int trail = codeUnitAt(index + 1); | |
423 if (trail < TRAIL_SURROGATE_BASE || trail > TRAIL_SURROGATE_END) { | |
424 return code; | |
425 } | |
426 return String.SMP_CODE_POINT_BASE + ((code & MASK) << 10) + (trail & MASK); | |
427 } | |
428 | |
429 List<int> get charCodes { | |
siva
2012/11/16 22:32:04
TODO, fix this to use the new code point iterator
erikcorry
2012/11/19 12:40:41
Done.
| |
430 int len = this.length; | |
431 final result = new List<int>(len); | |
432 int i, j; | |
433 for (i = j = 0; i < len; i++, j++) { | |
434 int c = this.charCodeAt(i); | |
435 // Check for supplementary plane character encoded as a UTF-16 surrogate | |
436 // pair. | |
437 if (c >= String.SMP_CODE_POINT_BASE) { | |
438 i++; | |
439 } | |
440 result[j] = c; | |
441 } | |
442 if (i == j) return result; | |
siva
2012/11/16 22:32:04
I find the (i == j) condition here a little unread
erikcorry
2012/11/19 12:40:41
Done.
| |
443 // If we saw some non-basic plane characters, then we have to return a | |
444 // slightly smaller array than expected (we can't trim the original one | |
445 // because it is non-extendable). This rarely happens so this is preferable | |
446 // to having a separate pass over the string to count the code points. | |
447 final newResult = new List<int>(j); | |
448 for (i = 0; i < j; i++) newResult[i] = result[i]; | |
449 return newResult; | |
450 } | |
siva
2012/11/16 22:32:04
This code here and in splitChars above seem to be
erikcorry
2012/11/19 12:40:41
Last three lines fixed to use getRange, but splitC
| |
451 } | |
452 | |
453 | |
454 class _TwoByteString extends _TwoByteStringBase implements String { | |
376 factory _TwoByteString._uninstantiable() { | 455 factory _TwoByteString._uninstantiable() { |
377 throw new UnsupportedError( | 456 throw new UnsupportedError( |
378 "_TwoByteString can only be allocated by the VM"); | 457 "_TwoByteString can only be allocated by the VM"); |
379 } | 458 } |
380 | |
381 // Checks for one-byte whitespaces only. | |
382 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid | |
383 // whitespaces. Add checking for multi-byte whitespace codepoints. | |
384 bool _isWhitespace(int codePoint) { | |
385 return | |
386 (codePoint == 32) || // Space. | |
387 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. | |
388 } | |
389 } | 459 } |
390 | 460 |
391 | 461 |
392 class _FourByteString extends _StringBase implements String { | 462 // TODO(erikcorry): This is going away. |
siva
2012/11/16 22:32:04
(why not remove it in this CL itself)?
erikcorry
2012/11/19 12:40:41
Done.
| |
463 class _FourByteString extends _StringBase { | |
393 factory _FourByteString._uninstantiable() { | 464 factory _FourByteString._uninstantiable() { |
394 throw new UnsupportedError( | 465 throw new UnsupportedError( |
395 "_FourByteString can only be allocated by the VM"); | 466 "_FourByteString can only be allocated by the VM"); |
396 } | 467 } |
397 | 468 |
398 // Checks for one-byte whitespaces only. | 469 // Checks for one-byte whitespaces only. |
399 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid | 470 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid |
400 // whitespaces. Add checking for multi-byte whitespace codepoints. | 471 // whitespaces. Add checking for multi-byte whitespace codepoints. |
401 bool _isWhitespace(int codePoint) { | 472 bool _isWhitespace(int codePoint) { |
402 return | 473 return |
403 (codePoint == 32) || // Space. | 474 (codePoint == 32) || // Space. |
404 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. | 475 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. |
405 } | 476 } |
406 } | 477 } |
407 | 478 |
408 | 479 |
409 class _ExternalOneByteString extends _StringBase implements String { | 480 class _ExternalOneByteString extends _StringBase implements String { |
410 factory _ExternalOneByteString._uninstantiable() { | 481 factory _ExternalOneByteString._uninstantiable() { |
411 throw new UnsupportedError( | 482 throw new UnsupportedError( |
412 "_ExternalOneByteString can only be allocated by the VM"); | 483 "_ExternalOneByteString can only be allocated by the VM"); |
413 } | 484 } |
414 | 485 |
415 // Checks for one-byte whitespaces only. | |
416 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid | |
417 // whitespaces for one byte strings. | |
418 bool _isWhitespace(int codePoint) { | 486 bool _isWhitespace(int codePoint) { |
419 return | 487 return |
420 (codePoint == 32) || // Space. | 488 (codePoint == 32) || // Space. |
489 (codePoint == 0xa0) || // No-break space. | |
421 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. | 490 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. |
422 } | 491 } |
492 | |
493 int charCodeAt(int index) => codeUnitAt(index); | |
494 | |
495 List<int> get charCodes => codeUnits; | |
496 } | |
497 | |
498 | |
499 class _ExternalTwoByteString extends _TwoByteStringBase implements String { | |
500 factory _ExternalTwoByteString._uninstantiable() { | |
501 throw new UnsupportedError( | |
502 "_ExternalTwoByteString can only be allocated by the VM"); | |
503 } | |
423 } | 504 } |
424 | 505 |
425 | 506 |
426 class _ExternalTwoByteString extends _StringBase implements String { | 507 // TODO(erikcorry): This is going away. |
siva
2012/11/16 22:32:04
Ditto comment.
erikcorry
2012/11/19 12:40:41
Done.
| |
427 factory _ExternalTwoByteString._uninstantiable() { | 508 class _ExternalFourByteString extends _StringBase { |
428 throw new UnsupportedError( | |
429 "_ExternalTwoByteString can only be allocated by the VM"); | |
430 } | |
431 | |
432 // Checks for one-byte whitespaces only. | |
433 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid | |
434 // whitespaces. Add checking for multi-byte whitespace codepoints. | |
435 bool _isWhitespace(int codePoint) { | |
436 return | |
437 (codePoint == 32) || // Space. | |
438 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. | |
439 } | |
440 } | |
441 | |
442 | |
443 class _ExternalFourByteString extends _StringBase implements String { | |
444 factory _ExternalFourByteString._uninstantiable() { | 509 factory _ExternalFourByteString._uninstantiable() { |
445 throw new UnsupportedError( | 510 throw new UnsupportedError( |
446 "ExternalFourByteString can only be allocated by the VM"); | 511 "ExternalFourByteString can only be allocated by the VM"); |
447 } | 512 } |
448 | 513 |
449 // Checks for one-byte whitespaces only. | 514 // Checks for one-byte whitespaces only. |
450 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid | 515 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid |
451 // whitespaces. Add checking for multi-byte whitespace codepoints. | 516 // whitespaces. Add checking for multi-byte whitespace codepoints. |
452 bool _isWhitespace(int codePoint) { | 517 bool _isWhitespace(int codePoint) { |
453 return | 518 return |
(...skipping 24 matching lines...) Expand all Loading... | |
478 for (int g in groups) { | 543 for (int g in groups) { |
479 result.add(group(g)); | 544 result.add(group(g)); |
480 } | 545 } |
481 return result; | 546 return result; |
482 } | 547 } |
483 | 548 |
484 final int start; | 549 final int start; |
485 final String str; | 550 final String str; |
486 final String pattern; | 551 final String pattern; |
487 } | 552 } |
OLD | NEW |