Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 /** | 5 /** |
| 6 * [_StringBase] contains common methods used by concrete String | 6 * [_StringBase] contains common methods used by concrete String |
| 7 * implementations, e.g., _OneByteString. | 7 * implementations, e.g., _OneByteString. |
| 8 */ | 8 */ |
| 9 class _StringBase { | 9 class _StringBase { |
| 10 | 10 |
| 11 factory _StringBase._uninstantiable() { | 11 factory _StringBase._uninstantiable() { |
| 12 throw new UnsupportedError( | 12 throw new UnsupportedError( |
| 13 "_StringBase can't be instaniated"); | 13 "_StringBase can't be instaniated"); |
| 14 } | 14 } |
| 15 | 15 |
| 16 int get hashCode native "String_getHashCode"; | 16 int get hashCode native "String_getHashCode"; |
| 17 | 17 |
| 18 /** | 18 /** |
| 19 * Create the most efficient string representation for specified | 19 * Create the most efficient string representation for specified |
| 20 * [codePoints]. | 20 * [codePoints]. |
| 21 */ | 21 */ |
| 22 static String createFromCharCodes(List<int> charCodes) { | 22 static String createFromCharCodes(List<int> codePoints) { |
| 23 _ObjectArray objectArray; | 23 _ObjectArray objectArray; |
| 24 if (charCodes is _ObjectArray) { | 24 if (codePoints is _ObjectArray) { |
| 25 objectArray = charCodes; | 25 objectArray = codePoints; |
| 26 } else { | 26 } else { |
| 27 int len = charCodes.length; | 27 int len = codePoints.length; |
| 28 objectArray = new _ObjectArray(len); | 28 objectArray = new _ObjectArray(len); |
| 29 for (int i = 0; i < len; i++) { | 29 for (int i = 0; i < len; i++) { |
| 30 objectArray[i] = charCodes[i]; | 30 objectArray[i] = codePoints[i]; |
| 31 } | 31 } |
| 32 } | 32 } |
| 33 return _createFromCodePoints(objectArray); | 33 return _createFromCodePoints(objectArray); |
| 34 } | 34 } |
| 35 | 35 |
| 36 static String _createFromCodePoints(List<int> codePoints) | 36 static String _createFromCodePoints(List<int> codePoints) |
| 37 native "StringBase_createFromCodePoints"; | 37 native "StringBase_createFromCodePoints"; |
| 38 | 38 |
| 39 static String createFromCodeUnits(List<int> codeUnits) { | |
| 40 _ObjectArray objectArray; | |
| 41 if (codeUnits is _ObjectArray) { | |
| 42 objectArray = codeUnits; | |
| 43 } else { | |
| 44 int len = codeUnits.length; | |
| 45 objectArray = new _ObjectArray(len); | |
| 46 for (int i = 0; i < len; i++) { | |
| 47 objectArray[i] = codeUnits[i]; | |
| 48 } | |
| 49 } | |
| 50 return _createFromCodeUnits(objectArray); | |
| 51 } | |
| 52 | |
| 53 static String _createFromCodeUnits(List<int> codeUnits) | |
| 54 native "StringBase_createFromCodeUnits"; | |
| 55 | |
| 39 String operator [](int index) native "String_charAt"; | 56 String operator [](int index) native "String_charAt"; |
| 40 | 57 |
| 41 int charCodeAt(int index) native "String_charCodeAt"; | 58 int charCodeAt(int index) native "String_charCodeAt"; |
| 42 | 59 |
| 60 int codeUnitAt(int index) native "String_codeUnitAt"; | |
| 61 | |
| 43 int get length native "String_getLength"; | 62 int get length native "String_getLength"; |
| 44 | 63 |
| 45 bool get isEmpty { | 64 bool get isEmpty { |
| 46 return this.length === 0; | 65 return this.length === 0; |
| 47 } | 66 } |
| 48 | 67 |
| 49 String concat(String other) native "String_concat"; | 68 String concat(String other) native "String_concat"; |
| 50 | 69 |
| 51 String toString() { | 70 String toString() { |
| 52 return this; | 71 return this; |
| 53 } | 72 } |
| 54 | 73 |
| 55 bool operator ==(Object other) { | 74 bool operator ==(Object other) { |
| 56 if (this === other) { | 75 if (this === other) { |
| 57 return true; | 76 return true; |
| 58 } | 77 } |
| 59 if ((other is !String) || | 78 if ((other is !String) || |
| 60 (this.length != other.length)) { | 79 (this.length != other.length)) { |
| 61 // TODO(5413632): Compare hash codes when both are present. | 80 // TODO(5413632): Compare hash codes when both are present. |
| 62 return false; | 81 return false; |
| 63 } | 82 } |
| 64 return this.compareTo(other) === 0; | 83 return this.compareTo(other) === 0; |
| 65 } | 84 } |
| 66 | 85 |
| 67 int compareTo(String other) { | 86 int compareTo(String other) { |
| 68 int thisLength = this.length; | 87 int thisLength = this.length; |
| 69 int otherLength = other.length; | 88 int otherLength = other.length; |
| 70 int len = (thisLength < otherLength) ? thisLength : otherLength; | 89 int len = (thisLength < otherLength) ? thisLength : otherLength; |
| 71 for (int i = 0; i < len; i++) { | 90 for (int i = 0; i < len; i++) { |
| 72 int thisCodePoint = this.charCodeAt(i); | 91 int thisCodeUnit = this.codeUnitAt(i); |
| 73 int otherCodePoint = other.charCodeAt(i); | 92 int otherCodeUnit = other.codeUnitAt(i); |
| 74 if (thisCodePoint < otherCodePoint) { | 93 if (thisCodeUnit < otherCodeUnit) { |
| 75 return -1; | 94 return -1; |
| 76 } | 95 } |
| 77 if (thisCodePoint > otherCodePoint) { | 96 if (thisCodeUnit > otherCodeUnit) { |
| 78 return 1; | 97 return 1; |
| 79 } | 98 } |
| 80 } | 99 } |
| 81 if (thisLength < otherLength) return -1; | 100 if (thisLength < otherLength) return -1; |
| 82 if (thisLength > otherLength) return 1; | 101 if (thisLength > otherLength) return 1; |
| 83 return 0; | 102 return 0; |
| 84 } | 103 } |
| 85 | 104 |
| 86 bool _substringMatches(int start, String other) { | 105 bool _substringMatches(int start, String other) { |
| 87 if (other.isEmpty) return true; | 106 if (other.isEmpty) return true; |
| 88 if ((start < 0) || (start >= this.length)) { | 107 if ((start < 0) || (start >= this.length)) { |
| 89 return false; | 108 return false; |
| 90 } | 109 } |
| 91 final int len = other.length; | 110 final int len = other.length; |
| 92 if ((start + len) > this.length) { | 111 if ((start + len) > this.length) { |
| 93 return false; | 112 return false; |
| 94 } | 113 } |
| 95 for (int i = 0; i < len; i++) { | 114 for (int i = 0; i < len; i++) { |
| 96 if (this.charCodeAt(i + start) != other.charCodeAt(i)) { | 115 if (this.codeUnitAt(i + start) != other.codeUnitAt(i)) { |
| 97 return false; | 116 return false; |
| 98 } | 117 } |
| 99 } | 118 } |
| 100 return true; | 119 return true; |
| 101 } | 120 } |
| 102 | 121 |
| 103 bool endsWith(String other) { | 122 bool endsWith(String other) { |
| 104 return _substringMatches(this.length - other.length, other); | 123 return _substringMatches(this.length - other.length, other); |
| 105 } | 124 } |
| 106 | 125 |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 155 return _substringUnchecked(startIndex, endIndex); | 174 return _substringUnchecked(startIndex, endIndex); |
| 156 } | 175 } |
| 157 | 176 |
| 158 String _substringUnchecked(int startIndex, int endIndex) | 177 String _substringUnchecked(int startIndex, int endIndex) |
| 159 native "StringBase_substringUnchecked"; | 178 native "StringBase_substringUnchecked"; |
| 160 | 179 |
| 161 String trim() { | 180 String trim() { |
| 162 final int len = this.length; | 181 final int len = this.length; |
| 163 int first = 0; | 182 int first = 0; |
| 164 for (; first < len; first++) { | 183 for (; first < len; first++) { |
| 165 if (!_isWhitespace(this.charCodeAt(first))) { | 184 // There are no whitespace characters that are outside the BMP so we |
| 185 // can use code units here for efficiency. | |
| 186 if (!_isWhitespace(this.codeUnitAt(first))) { | |
| 166 break; | 187 break; |
| 167 } | 188 } |
| 168 } | 189 } |
| 169 if (len == first) { | 190 if (len == first) { |
| 170 // String contains only whitespaces. | 191 // String contains only whitespaces. |
| 171 return ""; | 192 return ""; |
| 172 } | 193 } |
| 173 int last = len - 1; | 194 int last = len - 1; |
| 174 for (; last >= first; last--) { | 195 for (; last >= first; last--) { |
| 175 if (!_isWhitespace(this.charCodeAt(last))) { | 196 if (!_isWhitespace(this.codeUnitAt(last))) { |
| 176 break; | 197 break; |
| 177 } | 198 } |
| 178 } | 199 } |
| 179 if ((first == 0) && (last == (len - 1))) { | 200 if ((first == 0) && (last == (len - 1))) { |
| 180 // Returns this string if it does not have leading or trailing | 201 // Returns this string if it does not have leading or trailing |
| 181 // whitespaces. | 202 // whitespaces. |
| 182 return this; | 203 return this; |
| 183 } else { | 204 } else { |
| 184 return _substringUnchecked(first, last + 1); | 205 return _substringUnchecked(first, last + 1); |
| 185 } | 206 } |
| (...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 286 if (startIndex == endIndex && endIndex == previousIndex) { | 307 if (startIndex == endIndex && endIndex == previousIndex) { |
| 287 ++startIndex; // empty match, advance and restart | 308 ++startIndex; // empty match, advance and restart |
| 288 continue; | 309 continue; |
| 289 } | 310 } |
| 290 result.add(this.substring(previousIndex, match.start)); | 311 result.add(this.substring(previousIndex, match.start)); |
| 291 startIndex = previousIndex = endIndex; | 312 startIndex = previousIndex = endIndex; |
| 292 } | 313 } |
| 293 return result; | 314 return result; |
| 294 } | 315 } |
| 295 | 316 |
| 317 // TODO(erikcorry): Fix this to use the new code point iterator when it is | |
| 318 // available. | |
| 296 List<String> splitChars() { | 319 List<String> splitChars() { |
| 297 int len = this.length; | 320 int len = this.length; |
| 298 final result = new List<String>(len); | 321 final result = new List<String>(len); |
| 299 for (int i = 0; i < len; i++) { | 322 int i, j; |
| 300 result[i] = this[i]; | 323 for (i = j = 0; i < len; i++, j++) { |
| 324 int c = charCodeAt(i); | |
| 325 // Check for non-basic plane character encoded as a UTF-16 surrogate pair. | |
| 326 if (c > 0xffff) { | |
|
floitsch
2012/11/08 15:28:21
Can't you use Utf16::IsSurrogate(c)?
erikcorry
2012/11/15 13:28:25
No, that's a C++ function. I added some named con
| |
| 327 i++; | |
| 328 } | |
| 329 result[j] = new String.fromCharCodes([c]); | |
| 301 } | 330 } |
| 302 return result; | 331 if (i == j) return result; |
| 332 // If we saw some non-basic plane characters, then we have to return a | |
| 333 // slightly smaller array than expected (we can't trim the original one | |
| 334 // because it is non-extendable). This rarely happens so this is preferable | |
| 335 // to having a separate pass over the string to count the code points. | |
| 336 final newResult = new List<String>(j); | |
| 337 for (i = 0; i < j; i++) newResult[i] = result[i]; | |
| 338 return newResult; | |
| 303 } | 339 } |
| 304 | 340 |
| 305 List<int> get charCodes { | 341 List<int> get charCodes { |
| 306 int len = this.length; | 342 int len = this.length; |
| 307 final result = new List<int>(len); | 343 final result = new List<int>(len); |
| 344 int i, j; | |
| 345 for (i = j = 0; i < len; i++, j++) { | |
| 346 int c = this.charCodeAt(i); | |
| 347 // Check for non-basic plane character encoded as a UTF-16 surrogate pair. | |
| 348 if (c > 0xffff) { | |
| 349 i++; | |
| 350 } | |
| 351 result[j] = c; | |
| 352 } | |
| 353 if (i == j) return result; | |
| 354 // If we saw some non-basic plane characters, then we have to return a | |
| 355 // slightly smaller array than expected (we can't trim the original one | |
| 356 // because it is non-extendable). This rarely happens so this is preferable | |
| 357 // to having a separate pass over the string to count the code points. | |
| 358 final newResult = new List<int>(j); | |
| 359 for (i = 0; i < j; i++) newResult[i] = result[i]; | |
| 360 return newResult; | |
| 361 } | |
| 362 | |
| 363 List<int> get codeUnits { | |
| 364 int len = this.length; | |
| 365 final result = new List<int>(len); | |
| 308 for (int i = 0; i < len; i++) { | 366 for (int i = 0; i < len; i++) { |
| 309 result[i] = this.charCodeAt(i); | 367 result[i] = this.codeUnitAt(i); |
| 310 } | 368 } |
| 311 return result; | 369 return result; |
| 312 } | 370 } |
| 313 | 371 |
| 314 String toUpperCase() native "String_toUpperCase"; | 372 String toUpperCase() native "String_toUpperCase"; |
| 315 | 373 |
| 316 String toLowerCase() native "String_toLowerCase"; | 374 String toLowerCase() native "String_toLowerCase"; |
| 317 | 375 |
| 318 // Implementations of Strings methods follow below. | 376 // Implementations of Strings methods follow below. |
| 319 static String join(List<String> strings, String separator) { | 377 static String join(List<String> strings, String separator) { |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 353 native "Strings_concatAll"; | 411 native "Strings_concatAll"; |
| 354 } | 412 } |
| 355 | 413 |
| 356 | 414 |
| 357 class _OneByteString extends _StringBase implements String { | 415 class _OneByteString extends _StringBase implements String { |
| 358 factory _OneByteString._uninstantiable() { | 416 factory _OneByteString._uninstantiable() { |
| 359 throw new UnsupportedError( | 417 throw new UnsupportedError( |
| 360 "_OneByteString can only be allocated by the VM"); | 418 "_OneByteString can only be allocated by the VM"); |
| 361 } | 419 } |
| 362 | 420 |
| 363 // Checks for one-byte whitespaces only. | |
| 364 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid | |
| 365 // whitespaces for one byte strings. | |
| 366 bool _isWhitespace(int codePoint) { | 421 bool _isWhitespace(int codePoint) { |
| 367 return | 422 return |
| 368 (codePoint === 32) || // Space. | 423 (codePoint == 32) || // Space. |
| 424 (codePoint == 0xa0) || // No-break space. | |
| 369 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. | 425 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. |
| 370 } | 426 } |
| 371 | 427 |
| 372 } | 428 } |
| 373 | 429 |
| 374 | 430 |
| 375 class _TwoByteString extends _StringBase implements String { | 431 class _TwoByteString extends _StringBase implements String { |
| 376 factory _TwoByteString._uninstantiable() { | 432 factory _TwoByteString._uninstantiable() { |
| 377 throw new UnsupportedError( | 433 throw new UnsupportedError( |
| 378 "_TwoByteString can only be allocated by the VM"); | 434 "_TwoByteString can only be allocated by the VM"); |
| 379 } | 435 } |
| 380 | 436 |
| 381 // Checks for one-byte whitespaces only. | 437 // Works for both code points and code units since all spaces are in the BMP. |
| 382 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid | |
| 383 // whitespaces. Add checking for multi-byte whitespace codepoints. | |
| 384 bool _isWhitespace(int codePoint) { | 438 bool _isWhitespace(int codePoint) { |
| 385 return | 439 return |
| 386 (codePoint === 32) || // Space. | 440 (codePoint == 32) || // Space. |
| 387 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. | 441 (codePoint == 0xa0) || // No-break space. |
| 442 ((9 <= codePoint) && (codePoint <= 13)) || // CR, LF, TAB, etc. | |
| 443 (codePoint >= 0x1680 && // Optimization. | |
| 444 (codePoint == 0x1680 || // Ogham space mark. | |
| 445 codePoint == 0x180e || // Mongolian vowel separator. | |
| 446 (codePoint >= 0x2000 && codePoint <= 0x200a) || // Wide/narrow spaces. | |
| 447 codePoint == 0x202f || // Narrow no-break space. | |
| 448 codePoint == 0x205f || // Medium mathematical space. | |
| 449 codePoint == 0x3000)); // Ideographic space. | |
| 388 } | 450 } |
| 389 } | 451 } |
| 390 | 452 |
| 391 | 453 |
| 454 // TODO(erikcorry): This is going away. | |
| 392 class _FourByteString extends _StringBase implements String { | 455 class _FourByteString extends _StringBase implements String { |
| 393 factory _FourByteString._uninstantiable() { | 456 factory _FourByteString._uninstantiable() { |
| 394 throw new UnsupportedError( | 457 throw new UnsupportedError( |
| 395 "_FourByteString can only be allocated by the VM"); | 458 "_FourByteString can only be allocated by the VM"); |
| 396 } | 459 } |
| 397 | 460 |
| 398 // Checks for one-byte whitespaces only. | 461 // Checks for one-byte whitespaces only. |
| 399 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid | 462 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid |
| 400 // whitespaces. Add checking for multi-byte whitespace codepoints. | 463 // whitespaces. Add checking for multi-byte whitespace codepoints. |
| 401 bool _isWhitespace(int codePoint) { | 464 bool _isWhitespace(int codePoint) { |
| 402 return | 465 return |
| 403 (codePoint === 32) || // Space. | 466 (codePoint === 32) || // Space. |
| 404 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. | 467 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. |
| 405 } | 468 } |
| 406 } | 469 } |
| 407 | 470 |
| 408 | 471 |
| 409 class _ExternalOneByteString extends _StringBase implements String { | 472 class _ExternalOneByteString extends _StringBase implements String { |
| 410 factory _ExternalOneByteString._uninstantiable() { | 473 factory _ExternalOneByteString._uninstantiable() { |
| 411 throw new UnsupportedError( | 474 throw new UnsupportedError( |
| 412 "_ExternalOneByteString can only be allocated by the VM"); | 475 "_ExternalOneByteString can only be allocated by the VM"); |
| 413 } | 476 } |
| 414 | 477 |
| 415 // Checks for one-byte whitespaces only. | |
| 416 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid | |
| 417 // whitespaces for one byte strings. | |
| 418 bool _isWhitespace(int codePoint) { | 478 bool _isWhitespace(int codePoint) { |
| 419 return | 479 return |
| 420 (codePoint === 32) || // Space. | 480 (codePoint == 32) || // Space. |
| 481 (codePoint == 0xa0) || // No-break space. | |
| 421 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. | 482 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. |
| 422 } | 483 } |
| 423 } | 484 } |
| 424 | 485 |
| 425 | 486 |
| 426 class _ExternalTwoByteString extends _StringBase implements String { | 487 class _ExternalTwoByteString extends _StringBase implements String { |
| 427 factory ExternalTwoByteString._uninstantiable() { | 488 factory ExternalTwoByteString._uninstantiable() { |
| 428 throw new UnsupportedError( | 489 throw new UnsupportedError( |
| 429 "_ExternalTwoByteString can only be allocated by the VM"); | 490 "_ExternalTwoByteString can only be allocated by the VM"); |
| 430 } | 491 } |
| 431 | 492 |
| 432 // Checks for one-byte whitespaces only. | 493 // Works for both code points and code units since all spaces are in the BMP. |
| 433 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid | |
| 434 // whitespaces. Add checking for multi-byte whitespace codepoints. | |
| 435 bool _isWhitespace(int codePoint) { | 494 bool _isWhitespace(int codePoint) { |
| 436 return | 495 return |
| 437 (codePoint === 32) || // Space. | 496 (codePoint == 32) || // Space. |
| 438 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. | 497 (codePoint == 0xa0) || // No-break space. |
| 498 ((9 <= codePoint) && (codePoint <= 13)) || // CR, LF, TAB, etc. | |
| 499 (codePoint >= 0x1680 && // Optimization. | |
| 500 (codePoint == 0x1680 || // Ogham space mark. | |
| 501 codePoint == 0x180e || // Mongolian vowel separator. | |
| 502 (codePoint >= 0x2000 && codePoint <= 0x200a) || // Wide/narrow spaces. | |
| 503 codePoint == 0x202f || // Narrow no-break space. | |
| 504 codePoint == 0x205f || // Medium mathematical space. | |
| 505 codePoint == 0x3000)); // Ideographic space. | |
| 439 } | 506 } |
| 440 } | 507 } |
| 441 | 508 |
| 442 | 509 |
| 510 // TODO(erikcorry): This is going away. | |
| 443 class _ExternalFourByteString extends _StringBase implements String { | 511 class _ExternalFourByteString extends _StringBase implements String { |
| 444 factory _ExternalFourByteString._uninstantiable() { | 512 factory _ExternalFourByteString._uninstantiable() { |
| 445 throw new UnsupportedError( | 513 throw new UnsupportedError( |
| 446 "ExternalFourByteString can only be allocated by the VM"); | 514 "ExternalFourByteString can only be allocated by the VM"); |
| 447 } | 515 } |
| 448 | 516 |
| 449 // Checks for one-byte whitespaces only. | 517 // Checks for one-byte whitespaces only. |
| 450 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid | 518 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid |
| 451 // whitespaces. Add checking for multi-byte whitespace codepoints. | 519 // whitespaces. Add checking for multi-byte whitespace codepoints. |
| 452 bool _isWhitespace(int codePoint) { | 520 bool _isWhitespace(int codePoint) { |
| (...skipping 25 matching lines...) Expand all Loading... | |
| 478 for (int g in groups) { | 546 for (int g in groups) { |
| 479 result.add(group(g)); | 547 result.add(group(g)); |
| 480 } | 548 } |
| 481 return result; | 549 return result; |
| 482 } | 550 } |
| 483 | 551 |
| 484 final int start; | 552 final int start; |
| 485 final String str; | 553 final String str; |
| 486 final String pattern; | 554 final String pattern; |
| 487 } | 555 } |
| OLD | NEW |