Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(311)

Side by Side Diff: runtime/lib/string_base.dart

Issue 11411092: Revert "Add some support for the code-point code-unit distinction." (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/lib/string.cc ('k') | runtime/lib/string_patch.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 /** 5 /**
6 * [_StringBase] contains common methods used by concrete String 6 * [_StringBase] contains common methods used by concrete String
7 * implementations, e.g., _OneByteString. 7 * implementations, e.g., _OneByteString.
8 */ 8 */
9 class _StringBase { 9 class _StringBase {
10 10
11 factory _StringBase._uninstantiable() { 11 factory _StringBase._uninstantiable() {
12 throw new UnsupportedError( 12 throw new UnsupportedError(
13 "_StringBase can't be instaniated"); 13 "_StringBase can't be instaniated");
14 } 14 }
15 15
16 int get hashCode native "String_getHashCode"; 16 int get hashCode native "String_getHashCode";
17 17
18 /** 18 /**
19 * Create the most efficient string representation for the specified UTF-16 19 * Create the most efficient string representation for specified
20 * [codeUnits]. 20 * [codePoints].
21 */ 21 */
22 static String createFromUtf16(List<int> codeUnits) { 22 static String createFromCharCodes(List<int> charCodes) {
23 _ObjectArray objectArray; 23 _ObjectArray objectArray;
24 if (codeUnits is _ObjectArray) { 24 if (charCodes is _ObjectArray) {
25 objectArray = codeUnits; 25 objectArray = charCodes;
26 } else { 26 } else {
27 int len = codeUnits.length; 27 int len = charCodes.length;
28 objectArray = new _ObjectArray(len); 28 objectArray = new _ObjectArray(len);
29 for (int i = 0; i < len; i++) { 29 for (int i = 0; i < len; i++) {
30 objectArray[i] = codeUnits[i]; 30 objectArray[i] = charCodes[i];
31 } 31 }
32 } 32 }
33 return _createFromUtf16(objectArray); 33 return _createFromCodePoints(objectArray);
34 } 34 }
35 35
36 static String _createFromUtf16(List<int> codeUnits) 36 static String _createFromCodePoints(List<int> codePoints)
37 native "StringBase_createFromUtf16"; 37 native "StringBase_createFromCodePoints";
38 38
39 String operator [](int index) native "String_charAt"; 39 String operator [](int index) native "String_charAt";
40 40
41 int codeUnitAt(int index) native "String_codeUnitAt"; 41 int charCodeAt(int index) native "String_charCodeAt";
42 42
43 int get length native "String_getLength"; 43 int get length native "String_getLength";
44 44
45 bool get isEmpty { 45 bool get isEmpty {
46 return this.length == 0; 46 return this.length == 0;
47 } 47 }
48 48
49 String concat(String other) native "String_concat"; 49 String concat(String other) native "String_concat";
50 50
51 String toString() { 51 String toString() {
(...skipping 10 matching lines...) Expand all
62 return false; 62 return false;
63 } 63 }
64 return this.compareTo(other) == 0; 64 return this.compareTo(other) == 0;
65 } 65 }
66 66
67 int compareTo(String other) { 67 int compareTo(String other) {
68 int thisLength = this.length; 68 int thisLength = this.length;
69 int otherLength = other.length; 69 int otherLength = other.length;
70 int len = (thisLength < otherLength) ? thisLength : otherLength; 70 int len = (thisLength < otherLength) ? thisLength : otherLength;
71 for (int i = 0; i < len; i++) { 71 for (int i = 0; i < len; i++) {
72 int thisCodeUnit = this.codeUnitAt(i); 72 int thisCodePoint = this.charCodeAt(i);
73 int otherCodeUnit = other.codeUnitAt(i); 73 int otherCodePoint = other.charCodeAt(i);
74 if (thisCodeUnit < otherCodeUnit) { 74 if (thisCodePoint < otherCodePoint) {
75 return -1; 75 return -1;
76 } 76 }
77 if (thisCodeUnit > otherCodeUnit) { 77 if (thisCodePoint > otherCodePoint) {
78 return 1; 78 return 1;
79 } 79 }
80 } 80 }
81 if (thisLength < otherLength) return -1; 81 if (thisLength < otherLength) return -1;
82 if (thisLength > otherLength) return 1; 82 if (thisLength > otherLength) return 1;
83 return 0; 83 return 0;
84 } 84 }
85 85
86 bool _substringMatches(int start, String other) { 86 bool _substringMatches(int start, String other) {
87 if (other.isEmpty) return true; 87 if (other.isEmpty) return true;
88 if ((start < 0) || (start >= this.length)) { 88 if ((start < 0) || (start >= this.length)) {
89 return false; 89 return false;
90 } 90 }
91 final int len = other.length; 91 final int len = other.length;
92 if ((start + len) > this.length) { 92 if ((start + len) > this.length) {
93 return false; 93 return false;
94 } 94 }
95 for (int i = 0; i < len; i++) { 95 for (int i = 0; i < len; i++) {
96 if (this.codeUnitAt(i + start) != other.codeUnitAt(i)) { 96 if (this.charCodeAt(i + start) != other.charCodeAt(i)) {
97 return false; 97 return false;
98 } 98 }
99 } 99 }
100 return true; 100 return true;
101 } 101 }
102 102
103 bool endsWith(String other) { 103 bool endsWith(String other) {
104 return _substringMatches(this.length - other.length, other); 104 return _substringMatches(this.length - other.length, other);
105 } 105 }
106 106
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
155 return _substringUnchecked(startIndex, endIndex); 155 return _substringUnchecked(startIndex, endIndex);
156 } 156 }
157 157
158 String _substringUnchecked(int startIndex, int endIndex) 158 String _substringUnchecked(int startIndex, int endIndex)
159 native "StringBase_substringUnchecked"; 159 native "StringBase_substringUnchecked";
160 160
161 String trim() { 161 String trim() {
162 final int len = this.length; 162 final int len = this.length;
163 int first = 0; 163 int first = 0;
164 for (; first < len; first++) { 164 for (; first < len; first++) {
165 // There are no whitespace characters that are outside the BMP so we 165 if (!_isWhitespace(this.charCodeAt(first))) {
166 // can use code units here for efficiency.
167 if (!_isWhitespace(this.codeUnitAt(first))) {
168 break; 166 break;
169 } 167 }
170 } 168 }
171 if (len == first) { 169 if (len == first) {
172 // String contains only whitespaces. 170 // String contains only whitespaces.
173 return ""; 171 return "";
174 } 172 }
175 int last = len - 1; 173 int last = len - 1;
176 for (; last >= first; last--) { 174 for (; last >= first; last--) {
177 if (!_isWhitespace(this.codeUnitAt(last))) { 175 if (!_isWhitespace(this.charCodeAt(last))) {
178 break; 176 break;
179 } 177 }
180 } 178 }
181 if ((first == 0) && (last == (len - 1))) { 179 if ((first == 0) && (last == (len - 1))) {
182 // Returns this string if it does not have leading or trailing 180 // Returns this string if it does not have leading or trailing
183 // whitespaces. 181 // whitespaces.
184 return this; 182 return this;
185 } else { 183 } else {
186 return _substringUnchecked(first, last + 1); 184 return _substringUnchecked(first, last + 1);
187 } 185 }
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after
288 if (startIndex == endIndex && endIndex == previousIndex) { 286 if (startIndex == endIndex && endIndex == previousIndex) {
289 ++startIndex; // empty match, advance and restart 287 ++startIndex; // empty match, advance and restart
290 continue; 288 continue;
291 } 289 }
292 result.add(this.substring(previousIndex, match.start)); 290 result.add(this.substring(previousIndex, match.start));
293 startIndex = previousIndex = endIndex; 291 startIndex = previousIndex = endIndex;
294 } 292 }
295 return result; 293 return result;
296 } 294 }
297 295
298 // TODO(erikcorry): Fix this to use the new code point iterator when it is
299 // available.
300 List<String> splitChars() { 296 List<String> splitChars() {
301 int len = this.length; 297 int len = this.length;
302 final result = new List<String>(len); 298 final result = new List<String>(len);
303 bool supplementaryCharacterSeen = false; 299 for (int i = 0; i < len; i++) {
304 int i, j; 300 result[i] = this[i];
305 for (i = j = 0; i < len; i++, j++) {
306 int c = charCodeAt(i);
307 // Check for non-basic plane character encoded as a UTF-16 surrogate pair.
308 if (c >= String.SUPPLEMENTARY_CODE_POINT_BASE) {
309 i++;
310 supplementaryCharacterSeen = true;
311 }
312 result[j] = new String.fromCharCodes([c]);
313 } 301 }
314 if (!supplementaryCharacterSeen) return result; 302 return result;
315 // If we saw some non-basic plane characters, then we have to return a
316 // slightly smaller array than expected (we can't trim the original one
317 // because it is non-extendable). This rarely happens so this is preferable
318 // to having a separate pass over the string to count the code points.
319 return result.getRange(0, j);
320 } 303 }
321 304
322 List<int> get codeUnits { 305 List<int> get charCodes {
323 int len = this.length; 306 int len = this.length;
324 final result = new List<int>(len); 307 final result = new List<int>(len);
325 for (int i = 0; i < len; i++) { 308 for (int i = 0; i < len; i++) {
326 result[i] = this.codeUnitAt(i); 309 result[i] = this.charCodeAt(i);
327 } 310 }
328 return result; 311 return result;
329 } 312 }
330 313
331 String toUpperCase() native "String_toUpperCase"; 314 String toUpperCase() native "String_toUpperCase";
332 315
333 String toLowerCase() native "String_toLowerCase"; 316 String toLowerCase() native "String_toLowerCase";
334 317
335 // Implementations of Strings methods follow below. 318 // Implementations of Strings methods follow below.
336 static String join(List<String> strings, String separator) { 319 static String join(List<String> strings, String separator) {
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
370 native "Strings_concatAll"; 353 native "Strings_concatAll";
371 } 354 }
372 355
373 356
374 class _OneByteString extends _StringBase implements String { 357 class _OneByteString extends _StringBase implements String {
375 factory _OneByteString._uninstantiable() { 358 factory _OneByteString._uninstantiable() {
376 throw new UnsupportedError( 359 throw new UnsupportedError(
377 "_OneByteString can only be allocated by the VM"); 360 "_OneByteString can only be allocated by the VM");
378 } 361 }
379 362
363 // Checks for one-byte whitespaces only.
364 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid
365 // whitespaces for one byte strings.
380 bool _isWhitespace(int codePoint) { 366 bool _isWhitespace(int codePoint) {
381 return 367 return
382 (codePoint == 32) || // Space. 368 (codePoint == 32) || // Space.
383 (codePoint == 0xa0) || // No-break space.
384 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. 369 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.
385 } 370 }
386 371
387 int charCodeAt(int index) => codeUnitAt(index);
388
389 List<int> get charCodes => codeUnits;
390 } 372 }
391 373
392 374
393 class _TwoByteStringBase extends _StringBase { 375 class _TwoByteString extends _StringBase implements String {
394 factory _TwoByteStringBase._uninstantiable() { 376 factory _TwoByteString._uninstantiable() {
395 throw new UnsupportedError( 377 throw new UnsupportedError(
396 "_TwoByteStringBase can't be instaniated"); 378 "_TwoByteString can only be allocated by the VM");
397 } 379 }
398 380
399 // Works for both code points and code units since all spaces are in the BMP. 381 // Checks for one-byte whitespaces only.
382 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid
383 // whitespaces. Add checking for multi-byte whitespace codepoints.
400 bool _isWhitespace(int codePoint) { 384 bool _isWhitespace(int codePoint) {
401 return 385 return
402 (codePoint == 32) || // Space. 386 (codePoint == 32) || // Space.
403 (codePoint == 0xa0) || // No-break space. 387 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.
404 ((9 <= codePoint) && (codePoint <= 13)) || // CR, LF, TAB, etc.
405 (codePoint >= 0x1680 && // Optimization.
406 (codePoint == 0x1680 || // Ogham space mark.
407 codePoint == 0x180e || // Mongolian vowel separator.
408 (codePoint >= 0x2000 && codePoint <= 0x200a) || // Wide/narrow spaces.
409 codePoint == 0x2028 || // Line separator.
410 codePoint == 0x2029 || // Paragraph separator.
411 codePoint == 0x202f || // Narrow no-break space.
412 codePoint == 0x205f || // Medium mathematical space.
413 codePoint == 0x3000 || // Ideographic space.
414 codePoint == 0xfeff)); // BOM code.
415 }
416
417 int charCodeAt(int index) {
418 const int LEAD_SURROGATE_BASE = 0xd800;
419 const int LEAD_SURROGATE_END = 0xdbff;
420 const int TRAIL_SURROGATE_BASE = 0xdc00;
421 const int TRAIL_SURROGATE_END = 0xdfff;
422 const int MASK = 0x3ff;
423 int code = codeUnitAt(index);
424 if (code < LEAD_SURROGATE_BASE || code > LEAD_SURROGATE_END) return code;
425 if (index + 1 >= length) return code;
426 int trail = codeUnitAt(index + 1);
427 if (trail < TRAIL_SURROGATE_BASE || trail > TRAIL_SURROGATE_END) {
428 return code;
429 }
430 return String.SUPPLEMENTARY_CODE_POINT_BASE +
431 ((code & MASK) << 10) + (trail & MASK);
432 }
433
434 // TODO(erikcorry): Fix this to use the new code point iterator when it is
435 // available.
436 List<int> get charCodes {
437 int len = this.length;
438 final result = new List<int>(len);
439 bool supplementaryCharacterSeen = false;
440 int i, j;
441 for (i = j = 0; i < len; i++, j++) {
442 int c = this.charCodeAt(i);
443 // Check for supplementary plane character encoded as a UTF-16 surrogate
444 // pair.
445 if (c >= String.SUPPLEMENTARY_CODE_POINT_BASE) {
446 i++;
447 supplementaryCharacterSeen = true;
448 }
449 result[j] = c;
450 }
451 if (!supplementaryCharacterSeen) return result;
452 // If we saw some non-basic plane characters, then we have to return a
453 // slightly smaller array than expected (we can't trim the original one
454 // because it is non-extendable). This rarely happens so this is preferable
455 // to having a separate pass over the string to count the code points.
456 return result.getRange(0, j);
457 } 388 }
458 } 389 }
459 390
460 391
461 class _TwoByteString extends _TwoByteStringBase implements String { 392 class _FourByteString extends _StringBase implements String {
462 factory _TwoByteString._uninstantiable() { 393 factory _FourByteString._uninstantiable() {
463 throw new UnsupportedError( 394 throw new UnsupportedError(
464 "_TwoByteString can only be allocated by the VM"); 395 "_FourByteString can only be allocated by the VM");
396 }
397
398 // Checks for one-byte whitespaces only.
399 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid
400 // whitespaces. Add checking for multi-byte whitespace codepoints.
401 bool _isWhitespace(int codePoint) {
402 return
403 (codePoint == 32) || // Space.
404 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.
465 } 405 }
466 } 406 }
467 407
468 408
469 class _ExternalOneByteString extends _StringBase implements String { 409 class _ExternalOneByteString extends _StringBase implements String {
470 factory _ExternalOneByteString._uninstantiable() { 410 factory _ExternalOneByteString._uninstantiable() {
471 throw new UnsupportedError( 411 throw new UnsupportedError(
472 "_ExternalOneByteString can only be allocated by the VM"); 412 "_ExternalOneByteString can only be allocated by the VM");
473 } 413 }
474 414
415 // Checks for one-byte whitespaces only.
416 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid
417 // whitespaces for one byte strings.
475 bool _isWhitespace(int codePoint) { 418 bool _isWhitespace(int codePoint) {
476 return 419 return
477 (codePoint == 32) || // Space. 420 (codePoint == 32) || // Space.
478 (codePoint == 0xa0) || // No-break space.
479 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc. 421 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.
480 } 422 }
481
482 int charCodeAt(int index) => codeUnitAt(index);
483
484 List<int> get charCodes => codeUnits;
485 } 423 }
486 424
487 425
488 class _ExternalTwoByteString extends _TwoByteStringBase implements String { 426 class _ExternalTwoByteString extends _StringBase implements String {
489 factory _ExternalTwoByteString._uninstantiable() { 427 factory _ExternalTwoByteString._uninstantiable() {
490 throw new UnsupportedError( 428 throw new UnsupportedError(
491 "_ExternalTwoByteString can only be allocated by the VM"); 429 "_ExternalTwoByteString can only be allocated by the VM");
492 } 430 }
431
432 // Checks for one-byte whitespaces only.
433 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid
434 // whitespaces. Add checking for multi-byte whitespace codepoints.
435 bool _isWhitespace(int codePoint) {
436 return
437 (codePoint == 32) || // Space.
438 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.
439 }
493 } 440 }
494 441
495 442
443 class _ExternalFourByteString extends _StringBase implements String {
444 factory _ExternalFourByteString._uninstantiable() {
445 throw new UnsupportedError(
446 "ExternalFourByteString can only be allocated by the VM");
447 }
448
449 // Checks for one-byte whitespaces only.
450 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid
451 // whitespaces. Add checking for multi-byte whitespace codepoints.
452 bool _isWhitespace(int codePoint) {
453 return
454 (codePoint == 32) || // Space.
455 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.
456 }
457 }
458
459
496 class _StringMatch implements Match { 460 class _StringMatch implements Match {
497 const _StringMatch(int this.start, 461 const _StringMatch(int this.start,
498 String this.str, 462 String this.str,
499 String this.pattern); 463 String this.pattern);
500 464
501 int get end => start + pattern.length; 465 int get end => start + pattern.length;
502 String operator[](int g) => group(g); 466 String operator[](int g) => group(g);
503 int get groupCount => 0; 467 int get groupCount => 0;
504 468
505 String group(int group) { 469 String group(int group) {
506 if (group != 0) { 470 if (group != 0) {
507 throw new RangeError.value(group); 471 throw new RangeError.value(group);
508 } 472 }
509 return pattern; 473 return pattern;
510 } 474 }
511 475
512 List<String> groups(List<int> groups) { 476 List<String> groups(List<int> groups) {
513 List<String> result = new List<String>(); 477 List<String> result = new List<String>();
514 for (int g in groups) { 478 for (int g in groups) {
515 result.add(group(g)); 479 result.add(group(g));
516 } 480 }
517 return result; 481 return result;
518 } 482 }
519 483
520 final int start; 484 final int start;
521 final String str; 485 final String str;
522 final String pattern; 486 final String pattern;
523 } 487 }
OLDNEW
« no previous file with comments | « runtime/lib/string.cc ('k') | runtime/lib/string_patch.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698