runtime/lib/string_base.dart - Issue 11411092: Revert "Add some support for the code-point code-unit distinction."

Side by Side Diff: runtime/lib/string_base.dart

Issue 11411092: Revert "Add some support for the code-point code-unit distinction." (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Created 8 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 /**	5 /**

6 * [_StringBase] contains common methods used by concrete String	6 * [_StringBase] contains common methods used by concrete String

7 * implementations, e.g., _OneByteString.	7 * implementations, e.g., _OneByteString.

8 */	8 */

9 class _StringBase {	9 class _StringBase {

10	10

11 factory _StringBase._uninstantiable() {	11 factory _StringBase._uninstantiable() {

12 throw new UnsupportedError(	12 throw new UnsupportedError(

13 "_StringBase can't be instaniated");	13 "_StringBase can't be instaniated");

14 }	14 }

15	15

16 int get hashCode native "String_getHashCode";	16 int get hashCode native "String_getHashCode";

17	17

18 /**	18 /**

19 * Create the most efficient string representation for the specified UTF-16	19 * Create the most efficient string representation for specified

20 * [codeUnits].	20 * [codePoints].

21 */	21 */

22 static String createFromUtf16(List<int> codeUnits) {	22 static String createFromCharCodes(List<int> charCodes) {

23 _ObjectArray objectArray;	23 _ObjectArray objectArray;

24 if (codeUnits is _ObjectArray) {	24 if (charCodes is _ObjectArray) {

25 objectArray = codeUnits;	25 objectArray = charCodes;

26 } else {	26 } else {

27 int len = codeUnits.length;	27 int len = charCodes.length;

28 objectArray = new _ObjectArray(len);	28 objectArray = new _ObjectArray(len);

29 for (int i = 0; i < len; i++) {	29 for (int i = 0; i < len; i++) {

30 objectArray[i] = codeUnits[i];	30 objectArray[i] = charCodes[i];

31 }	31 }

32 }	32 }

33 return _createFromUtf16(objectArray);	33 return _createFromCodePoints(objectArray);

34 }	34 }

35	35

36 static String _createFromUtf16(List<int> codeUnits)	36 static String _createFromCodePoints(List<int> codePoints)

37 native "StringBase_createFromUtf16";	37 native "StringBase_createFromCodePoints";

38	38

39 String operator [](int index) native "String_charAt";	39 String operator [](int index) native "String_charAt";

40	40

41 int codeUnitAt(int index) native "String_codeUnitAt";	41 int charCodeAt(int index) native "String_charCodeAt";

42	42

43 int get length native "String_getLength";	43 int get length native "String_getLength";

44	44

45 bool get isEmpty {	45 bool get isEmpty {

46 return this.length == 0;	46 return this.length == 0;

47 }	47 }

48	48

49 String concat(String other) native "String_concat";	49 String concat(String other) native "String_concat";

50	50

51 String toString() {	51 String toString() {

(...skipping 10 matching lines...) Expand all Loading...
62 return false;	62 return false;

63 }	63 }

64 return this.compareTo(other) == 0;	64 return this.compareTo(other) == 0;

65 }	65 }

66	66

67 int compareTo(String other) {	67 int compareTo(String other) {

68 int thisLength = this.length;	68 int thisLength = this.length;

69 int otherLength = other.length;	69 int otherLength = other.length;

70 int len = (thisLength < otherLength) ? thisLength : otherLength;	70 int len = (thisLength < otherLength) ? thisLength : otherLength;

71 for (int i = 0; i < len; i++) {	71 for (int i = 0; i < len; i++) {

72 int thisCodeUnit = this.codeUnitAt(i);	72 int thisCodePoint = this.charCodeAt(i);

73 int otherCodeUnit = other.codeUnitAt(i);	73 int otherCodePoint = other.charCodeAt(i);

74 if (thisCodeUnit < otherCodeUnit) {	74 if (thisCodePoint < otherCodePoint) {

75 return -1;	75 return -1;

76 }	76 }

77 if (thisCodeUnit > otherCodeUnit) {	77 if (thisCodePoint > otherCodePoint) {

78 return 1;	78 return 1;

79 }	79 }

80 }	80 }

81 if (thisLength < otherLength) return -1;	81 if (thisLength < otherLength) return -1;

82 if (thisLength > otherLength) return 1;	82 if (thisLength > otherLength) return 1;

83 return 0;	83 return 0;

84 }	84 }

85	85

86 bool _substringMatches(int start, String other) {	86 bool _substringMatches(int start, String other) {

87 if (other.isEmpty) return true;	87 if (other.isEmpty) return true;

88 if ((start < 0) \|\| (start >= this.length)) {	88 if ((start < 0) \|\| (start >= this.length)) {

89 return false;	89 return false;

90 }	90 }

91 final int len = other.length;	91 final int len = other.length;

92 if ((start + len) > this.length) {	92 if ((start + len) > this.length) {

93 return false;	93 return false;

94 }	94 }

95 for (int i = 0; i < len; i++) {	95 for (int i = 0; i < len; i++) {

96 if (this.codeUnitAt(i + start) != other.codeUnitAt(i)) {	96 if (this.charCodeAt(i + start) != other.charCodeAt(i)) {

97 return false;	97 return false;

98 }	98 }

99 }	99 }

100 return true;	100 return true;

101 }	101 }

102	102

103 bool endsWith(String other) {	103 bool endsWith(String other) {

104 return _substringMatches(this.length - other.length, other);	104 return _substringMatches(this.length - other.length, other);

105 }	105 }

106	106

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
155 return _substringUnchecked(startIndex, endIndex);	155 return _substringUnchecked(startIndex, endIndex);

156 }	156 }

157	157

158 String _substringUnchecked(int startIndex, int endIndex)	158 String _substringUnchecked(int startIndex, int endIndex)

159 native "StringBase_substringUnchecked";	159 native "StringBase_substringUnchecked";

160	160

161 String trim() {	161 String trim() {

162 final int len = this.length;	162 final int len = this.length;

163 int first = 0;	163 int first = 0;

164 for (; first < len; first++) {	164 for (; first < len; first++) {

165 // There are no whitespace characters that are outside the BMP so we	165 if (!_isWhitespace(this.charCodeAt(first))) {

166 // can use code units here for efficiency.

167 if (!_isWhitespace(this.codeUnitAt(first))) {

168 break;	166 break;

169 }	167 }

170 }	168 }

171 if (len == first) {	169 if (len == first) {

172 // String contains only whitespaces.	170 // String contains only whitespaces.

173 return "";	171 return "";

174 }	172 }

175 int last = len - 1;	173 int last = len - 1;

176 for (; last >= first; last--) {	174 for (; last >= first; last--) {

177 if (!_isWhitespace(this.codeUnitAt(last))) {	175 if (!_isWhitespace(this.charCodeAt(last))) {

178 break;	176 break;

179 }	177 }

180 }	178 }

181 if ((first == 0) && (last == (len - 1))) {	179 if ((first == 0) && (last == (len - 1))) {

182 // Returns this string if it does not have leading or trailing	180 // Returns this string if it does not have leading or trailing

183 // whitespaces.	181 // whitespaces.

184 return this;	182 return this;

185 } else {	183 } else {

186 return _substringUnchecked(first, last + 1);	184 return _substringUnchecked(first, last + 1);

187 }	185 }

(...skipping 100 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
288 if (startIndex == endIndex && endIndex == previousIndex) {	286 if (startIndex == endIndex && endIndex == previousIndex) {

289 ++startIndex; // empty match, advance and restart	287 ++startIndex; // empty match, advance and restart

290 continue;	288 continue;

291 }	289 }

292 result.add(this.substring(previousIndex, match.start));	290 result.add(this.substring(previousIndex, match.start));

293 startIndex = previousIndex = endIndex;	291 startIndex = previousIndex = endIndex;

294 }	292 }

295 return result;	293 return result;

296 }	294 }

297	295

298 // TODO(erikcorry): Fix this to use the new code point iterator when it is

299 // available.

300 List<String> splitChars() {	296 List<String> splitChars() {

301 int len = this.length;	297 int len = this.length;

302 final result = new List<String>(len);	298 final result = new List<String>(len);

303 bool supplementaryCharacterSeen = false;	299 for (int i = 0; i < len; i++) {

304 int i, j;	300 result[i] = this[i];

305 for (i = j = 0; i < len; i++, j++) {

306 int c = charCodeAt(i);

307 // Check for non-basic plane character encoded as a UTF-16 surrogate pair.

308 if (c >= String.SUPPLEMENTARY_CODE_POINT_BASE) {

309 i++;

310 supplementaryCharacterSeen = true;

311 }

312 result[j] = new String.fromCharCodes([c]);

313 }	301 }

314 if (!supplementaryCharacterSeen) return result;	302 return result;

315 // If we saw some non-basic plane characters, then we have to return a

316 // slightly smaller array than expected (we can't trim the original one

317 // because it is non-extendable). This rarely happens so this is preferable

318 // to having a separate pass over the string to count the code points.

319 return result.getRange(0, j);

320 }	303 }

321	304

322 List<int> get codeUnits {	305 List<int> get charCodes {

323 int len = this.length;	306 int len = this.length;

324 final result = new List<int>(len);	307 final result = new List<int>(len);

325 for (int i = 0; i < len; i++) {	308 for (int i = 0; i < len; i++) {

326 result[i] = this.codeUnitAt(i);	309 result[i] = this.charCodeAt(i);

327 }	310 }

328 return result;	311 return result;

329 }	312 }

330	313

331 String toUpperCase() native "String_toUpperCase";	314 String toUpperCase() native "String_toUpperCase";

332	315

333 String toLowerCase() native "String_toLowerCase";	316 String toLowerCase() native "String_toLowerCase";

334	317

335 // Implementations of Strings methods follow below.	318 // Implementations of Strings methods follow below.

336 static String join(List<String> strings, String separator) {	319 static String join(List<String> strings, String separator) {

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
370 native "Strings_concatAll";	353 native "Strings_concatAll";

371 }	354 }

372	355

373	356

374 class _OneByteString extends _StringBase implements String {	357 class _OneByteString extends _StringBase implements String {

375 factory _OneByteString._uninstantiable() {	358 factory _OneByteString._uninstantiable() {

376 throw new UnsupportedError(	359 throw new UnsupportedError(

377 "_OneByteString can only be allocated by the VM");	360 "_OneByteString can only be allocated by the VM");

378 }	361 }

379	362

	363 // Checks for one-byte whitespaces only.

	364 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid

	365 // whitespaces for one byte strings.

380 bool _isWhitespace(int codePoint) {	366 bool _isWhitespace(int codePoint) {

381 return	367 return

382 (codePoint == 32) \|\| // Space.	368 (codePoint == 32) \|\| // Space.

383 (codePoint == 0xa0) \|\| // No-break space.

384 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.	369 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.

385 }	370 }

386	371

387 int charCodeAt(int index) => codeUnitAt(index);

388

389 List<int> get charCodes => codeUnits;

390 }	372 }

391	373

392	374

393 class _TwoByteStringBase extends _StringBase {	375 class _TwoByteString extends _StringBase implements String {

394 factory _TwoByteStringBase._uninstantiable() {	376 factory _TwoByteString._uninstantiable() {

395 throw new UnsupportedError(	377 throw new UnsupportedError(

396 "_TwoByteStringBase can't be instaniated");	378 "_TwoByteString can only be allocated by the VM");

397 }	379 }

398	380

399 // Works for both code points and code units since all spaces are in the BMP.	381 // Checks for one-byte whitespaces only.

	382 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid

	383 // whitespaces. Add checking for multi-byte whitespace codepoints.

400 bool _isWhitespace(int codePoint) {	384 bool _isWhitespace(int codePoint) {

401 return	385 return

402 (codePoint == 32) \|\| // Space.	386 (codePoint == 32) \|\| // Space.

403 (codePoint == 0xa0) \|\| // No-break space.	387 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.

404 ((9 <= codePoint) && (codePoint <= 13)) \|\| // CR, LF, TAB, etc.

405 (codePoint >= 0x1680 && // Optimization.

406 (codePoint == 0x1680 \|\| // Ogham space mark.

407 codePoint == 0x180e \|\| // Mongolian vowel separator.

408 (codePoint >= 0x2000 && codePoint <= 0x200a) \|\| // Wide/narrow spaces.

409 codePoint == 0x2028 \|\| // Line separator.

410 codePoint == 0x2029 \|\| // Paragraph separator.

411 codePoint == 0x202f \|\| // Narrow no-break space.

412 codePoint == 0x205f \|\| // Medium mathematical space.

413 codePoint == 0x3000 \|\| // Ideographic space.

414 codePoint == 0xfeff)); // BOM code.

415 }

416

417 int charCodeAt(int index) {

418 const int LEAD_SURROGATE_BASE = 0xd800;

419 const int LEAD_SURROGATE_END = 0xdbff;

420 const int TRAIL_SURROGATE_BASE = 0xdc00;

421 const int TRAIL_SURROGATE_END = 0xdfff;

422 const int MASK = 0x3ff;

423 int code = codeUnitAt(index);

424 if (code < LEAD_SURROGATE_BASE \|\| code > LEAD_SURROGATE_END) return code;

425 if (index + 1 >= length) return code;

426 int trail = codeUnitAt(index + 1);

427 if (trail < TRAIL_SURROGATE_BASE \|\| trail > TRAIL_SURROGATE_END) {

428 return code;

429 }

430 return String.SUPPLEMENTARY_CODE_POINT_BASE +

431 ((code & MASK) << 10) + (trail & MASK);

432 }

433

434 // TODO(erikcorry): Fix this to use the new code point iterator when it is

435 // available.

436 List<int> get charCodes {

437 int len = this.length;

438 final result = new List<int>(len);

439 bool supplementaryCharacterSeen = false;

440 int i, j;

441 for (i = j = 0; i < len; i++, j++) {

442 int c = this.charCodeAt(i);

443 // Check for supplementary plane character encoded as a UTF-16 surrogate

444 // pair.

445 if (c >= String.SUPPLEMENTARY_CODE_POINT_BASE) {

446 i++;

447 supplementaryCharacterSeen = true;

448 }

449 result[j] = c;

450 }

451 if (!supplementaryCharacterSeen) return result;

452 // If we saw some non-basic plane characters, then we have to return a

453 // slightly smaller array than expected (we can't trim the original one

454 // because it is non-extendable). This rarely happens so this is preferable

455 // to having a separate pass over the string to count the code points.

456 return result.getRange(0, j);

457 }	388 }

458 }	389 }

459	390

460	391

461 class _TwoByteString extends _TwoByteStringBase implements String {	392 class _FourByteString extends _StringBase implements String {

462 factory _TwoByteString._uninstantiable() {	393 factory _FourByteString._uninstantiable() {

463 throw new UnsupportedError(	394 throw new UnsupportedError(

464 "_TwoByteString can only be allocated by the VM");	395 "_FourByteString can only be allocated by the VM");

	396 }

	397

	398 // Checks for one-byte whitespaces only.

	399 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid

	400 // whitespaces. Add checking for multi-byte whitespace codepoints.

	401 bool _isWhitespace(int codePoint) {

	402 return

	403 (codePoint == 32) \|\| // Space.

	404 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.

465 }	405 }

466 }	406 }

467	407

468	408

469 class _ExternalOneByteString extends _StringBase implements String {	409 class _ExternalOneByteString extends _StringBase implements String {

470 factory _ExternalOneByteString._uninstantiable() {	410 factory _ExternalOneByteString._uninstantiable() {

471 throw new UnsupportedError(	411 throw new UnsupportedError(

472 "_ExternalOneByteString can only be allocated by the VM");	412 "_ExternalOneByteString can only be allocated by the VM");

473 }	413 }

474	414

	415 // Checks for one-byte whitespaces only.

	416 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid

	417 // whitespaces for one byte strings.

475 bool _isWhitespace(int codePoint) {	418 bool _isWhitespace(int codePoint) {

476 return	419 return

477 (codePoint == 32) \|\| // Space.	420 (codePoint == 32) \|\| // Space.

478 (codePoint == 0xa0) \|\| // No-break space.

479 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.	421 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.

480 }	422 }

481

482 int charCodeAt(int index) => codeUnitAt(index);

483

484 List<int> get charCodes => codeUnits;

485 }	423 }

486	424

487	425

488 class _ExternalTwoByteString extends _TwoByteStringBase implements String {	426 class _ExternalTwoByteString extends _StringBase implements String {

489 factory _ExternalTwoByteString._uninstantiable() {	427 factory _ExternalTwoByteString._uninstantiable() {

490 throw new UnsupportedError(	428 throw new UnsupportedError(

491 "_ExternalTwoByteString can only be allocated by the VM");	429 "_ExternalTwoByteString can only be allocated by the VM");

492 }	430 }

	431

	432 // Checks for one-byte whitespaces only.

	433 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid

	434 // whitespaces. Add checking for multi-byte whitespace codepoints.

	435 bool _isWhitespace(int codePoint) {

	436 return

	437 (codePoint == 32) \|\| // Space.

	438 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.

	439 }

493 }	440 }

494	441

495	442

	443 class _ExternalFourByteString extends _StringBase implements String {

	444 factory _ExternalFourByteString._uninstantiable() {

	445 throw new UnsupportedError(

	446 "ExternalFourByteString can only be allocated by the VM");

	447 }

	448

	449 // Checks for one-byte whitespaces only.

	450 // TODO(srdjan): Investigate if 0x85 (NEL) and 0xA0 (NBSP) are valid

	451 // whitespaces. Add checking for multi-byte whitespace codepoints.

	452 bool _isWhitespace(int codePoint) {

	453 return

	454 (codePoint == 32) \|\| // Space.

	455 ((9 <= codePoint) && (codePoint <= 13)); // CR, LF, TAB, etc.

	456 }

	457 }

	458

	459

496 class _StringMatch implements Match {	460 class _StringMatch implements Match {

497 const _StringMatch(int this.start,	461 const _StringMatch(int this.start,

498 String this.str,	462 String this.str,

499 String this.pattern);	463 String this.pattern);

500	464

501 int get end => start + pattern.length;	465 int get end => start + pattern.length;

502 String operator[](int g) => group(g);	466 String operator[](int g) => group(g);

503 int get groupCount => 0;	467 int get groupCount => 0;

504	468

505 String group(int group) {	469 String group(int group) {

506 if (group != 0) {	470 if (group != 0) {

507 throw new RangeError.value(group);	471 throw new RangeError.value(group);

508 }	472 }

509 return pattern;	473 return pattern;

510 }	474 }

511	475

512 List<String> groups(List<int> groups) {	476 List<String> groups(List<int> groups) {

513 List<String> result = new List<String>();	477 List<String> result = new List<String>();

514 for (int g in groups) {	478 for (int g in groups) {

515 result.add(group(g));	479 result.add(group(g));

516 }	480 }

517 return result;	481 return result;

518 }	482 }

519	483

520 final int start;	484 final int start;

521 final String str;	485 final String str;

522 final String pattern;	486 final String pattern;

523 }	487 }

OLD	NEW

« no previous file with comments | « runtime/lib/string.cc ('k') | runtime/lib/string_patch.dart » ('j') | no next file with comments »