sdk/lib/convert/utf.dart - Issue 2754013002: Format all dart: library files

Side by Side Diff: sdk/lib/convert/utf.dart

Issue 2754013002: Format all dart: library files (Closed)

Patch Set: Format all dart: library files Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 part of dart.convert;	5 part of dart.convert;

6	6

7 /** The Unicode Replacement character `U+FFFD` (�). */	7 /** The Unicode Replacement character `U+FFFD` (�). */

8 const int UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD;	8 const int UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD;

9	9

10 /** The Unicode Byte Order Marker (BOM) character `U+FEFF`. */	10 /** The Unicode Byte Order Marker (BOM) character `U+FEFF`. */

(...skipping 24 matching lines...) Expand all Loading...
35 * Instantiates a new [Utf8Codec].	35 * Instantiates a new [Utf8Codec].

36 *	36 *

37 * The optional [allowMalformed] argument defines how [decoder] (and [decode])	37 * The optional [allowMalformed] argument defines how [decoder] (and [decode])

38 * deal with invalid or unterminated character sequences.	38 * deal with invalid or unterminated character sequences.

39 *	39 *

40 * If it is `true` (and not overridden at the method invocation) [decode] and	40 * If it is `true` (and not overridden at the method invocation) [decode] and

41 * the [decoder] replace invalid (or unterminated) octet	41 * the [decoder] replace invalid (or unterminated) octet

42 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise	42 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise

43 * they throw a [FormatException].	43 * they throw a [FormatException].

44 */	44 */

45 const Utf8Codec({ bool allowMalformed: false })	45 const Utf8Codec({bool allowMalformed: false})

46 : _allowMalformed = allowMalformed;	46 : _allowMalformed = allowMalformed;

47	47

48 String get name => "utf-8";	48 String get name => "utf-8";

49	49

50 /**	50 /**

51 * Decodes the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the	51 * Decodes the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the

52 * corresponding string.	52 * corresponding string.

53 *	53 *

54 * If the [codeUnits] start with the encoding of a	54 * If the [codeUnits] start with the encoding of a

55 * [UNICODE_BOM_CHARACTER_RUNE], that character is discarded.	55 * [UNICODE_BOM_CHARACTER_RUNE], that character is discarded.

56 *	56 *

57 * If [allowMalformed] is `true` the decoder replaces invalid (or	57 * If [allowMalformed] is `true` the decoder replaces invalid (or

58 * unterminated) character sequences with the Unicode Replacement character	58 * unterminated) character sequences with the Unicode Replacement character

59 * `U+FFFD` (�). Otherwise it throws a [FormatException].	59 * `U+FFFD` (�). Otherwise it throws a [FormatException].

60 *	60 *

61 * If [allowMalformed] is not given, it defaults to the `allowMalformed` that	61 * If [allowMalformed] is not given, it defaults to the `allowMalformed` that

62 * was used to instantiate `this`.	62 * was used to instantiate `this`.

63 */	63 */

64 String decode(List<int> codeUnits, { bool allowMalformed }) {	64 String decode(List<int> codeUnits, {bool allowMalformed}) {

65 if (allowMalformed == null) allowMalformed = _allowMalformed;	65 if (allowMalformed == null) allowMalformed = _allowMalformed;

66 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits);	66 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits);

67 }	67 }

68	68

69 Utf8Encoder get encoder => const Utf8Encoder();	69 Utf8Encoder get encoder => const Utf8Encoder();

70 Utf8Decoder get decoder {	70 Utf8Decoder get decoder {

71 return new Utf8Decoder(allowMalformed: _allowMalformed);	71 return new Utf8Decoder(allowMalformed: _allowMalformed);

72 }	72 }

73 }	73 }

74	74

75 /**	75 /**

76 * This class converts strings to their UTF-8 code units (a list of	76 * This class converts strings to their UTF-8 code units (a list of

77 * unsigned 8-bit integers).	77 * unsigned 8-bit integers).

78 */	78 */

79 class Utf8Encoder extends Converter<String, List<int>>	79 class Utf8Encoder extends Converter<String, List<int>>

80 implements ChunkedConverter<String, List<int>, String, List<int>> {	80 implements ChunkedConverter<String, List<int>, String, List<int>> {

81

82 const Utf8Encoder();	81 const Utf8Encoder();

83	82

84 /**	83 /**

85 * Converts [string] to its UTF-8 code units (a list of	84 * Converts [string] to its UTF-8 code units (a list of

86 * unsigned 8-bit integers).	85 * unsigned 8-bit integers).

87 *	86 *

88 * If [start] and [end] are provided, only the substring	87 * If [start] and [end] are provided, only the substring

89 * `string.substring(start, end)` is converted.	88 * `string.substring(start, end)` is converted.

90 */	89 */

91 List<int> convert(String string, [int start = 0, int end]) {	90 List<int> convert(String string, [int start = 0, int end]) {

(...skipping 141 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
233 }	232 }

234 return stringIndex;	233 return stringIndex;

235 }	234 }

236 }	235 }

237	236

238 /**	237 /**

239 * This class encodes chunked strings to UTF-8 code units (unsigned 8-bit	238 * This class encodes chunked strings to UTF-8 code units (unsigned 8-bit

240 * integers).	239 * integers).

241 */	240 */

242 class _Utf8EncoderSink extends _Utf8Encoder with StringConversionSinkMixin {	241 class _Utf8EncoderSink extends _Utf8Encoder with StringConversionSinkMixin {

243

244 final ByteConversionSink _sink;	242 final ByteConversionSink _sink;

245	243

246 _Utf8EncoderSink(this._sink);	244 _Utf8EncoderSink(this._sink);

247	245

248 void close() {	246 void close() {

249 if (_carry != 0) {	247 if (_carry != 0) {

250 // addSlice will call close again, but then the carry must be equal to 0.	248 // addSlice will call close again, but then the carry must be equal to 0.

251 addSlice("", 0, 0, true);	249 addSlice("", 0, 0, true);

252 return;	250 return;

253 }	251 }

254 _sink.close();	252 _sink.close();

255 }	253 }

256	254

257 void addSlice(String str, int start, int end, bool isLast) {	255 void addSlice(String str, int start, int end, bool isLast) {

258 _bufferIndex = 0;	256 _bufferIndex = 0;

259	257

260 if (start == end && !isLast) {	258 if (start == end && !isLast) {

261 return;	259 return;

262 }	260 }

263	261

264 if (_carry != 0) {	262 if (_carry != 0) {

265 int nextCodeUnit = 0;	263 int nextCodeUnit = 0;

266 if (start != end) {	264 if (start != end) {

267 nextCodeUnit = str.codeUnitAt(start);	265 nextCodeUnit = str.codeUnitAt(start);

268 } else {	266 } else {

269 assert(isLast);	267 assert(isLast);

270 }	268 }

271 bool wasCombined = _writeSurrogate(_carry, nextCodeUnit);	269 bool wasCombined = _writeSurrogate(_carry, nextCodeUnit);

272 // Either we got a non-empty string, or we must not have been combined.	270 // Either we got a non-empty string, or we must not have been combined.

273 assert(!wasCombined \|\| start != end );	271 assert(!wasCombined \|\| start != end);

274 if (wasCombined) start++;	272 if (wasCombined) start++;

275 _carry = 0;	273 _carry = 0;

276 }	274 }

277 do {	275 do {

278 start = _fillBuffer(str, start, end);	276 start = _fillBuffer(str, start, end);

279 bool isLastSlice = isLast && (start == end);	277 bool isLastSlice = isLast && (start == end);

280 if (start == end - 1 && _isLeadSurrogate(str.codeUnitAt(start))) {	278 if (start == end - 1 && _isLeadSurrogate(str.codeUnitAt(start))) {

281 if (isLast && _bufferIndex < _buffer.length - 3) {	279 if (isLast && _bufferIndex < _buffer.length - 3) {

282 // There is still space for the last incomplete surrogate.	280 // There is still space for the last incomplete surrogate.

283 // We use a non-surrogate as second argument. This way the	281 // We use a non-surrogate as second argument. This way the

(...skipping 16 matching lines...) Expand all Loading...
300 // TODO(floitsch): implement asUtf8Sink. Sligthly complicated because it	298 // TODO(floitsch): implement asUtf8Sink. Sligthly complicated because it

301 // needs to deal with malformed input.	299 // needs to deal with malformed input.

302 }	300 }

303	301

304 /**	302 /**

305 * This class converts UTF-8 code units (lists of unsigned 8-bit integers)	303 * This class converts UTF-8 code units (lists of unsigned 8-bit integers)

306 * to a string.	304 * to a string.

307 */	305 */

308 class Utf8Decoder extends Converter<List<int>, String>	306 class Utf8Decoder extends Converter<List<int>, String>

309 implements ChunkedConverter<List<int>, String, List<int>, String> {	307 implements ChunkedConverter<List<int>, String, List<int>, String> {

310

311 final bool _allowMalformed;	308 final bool _allowMalformed;

312	309

313 /**	310 /**

314 * Instantiates a new [Utf8Decoder].	311 * Instantiates a new [Utf8Decoder].

315 *	312 *

316 * The optional [allowMalformed] argument defines how [convert] deals	313 * The optional [allowMalformed] argument defines how [convert] deals

317 * with invalid or unterminated character sequences.	314 * with invalid or unterminated character sequences.

318 *	315 *

319 * If it is `true` [convert] replaces invalid (or unterminated) character	316 * If it is `true` [convert] replaces invalid (or unterminated) character

320 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise	317 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise

321 * it throws a [FormatException].	318 * it throws a [FormatException].

322 */	319 */

323 const Utf8Decoder({ bool allowMalformed: false })	320 const Utf8Decoder({bool allowMalformed: false})

324 : this._allowMalformed = allowMalformed;	321 : this._allowMalformed = allowMalformed;

325	322

326 /**	323 /**

327 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the	324 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the

328 * corresponding string.	325 * corresponding string.

329 *	326 *

330 * Uses the code units from [start] to, but no including, [end].	327 * Uses the code units from [start] to, but no including, [end].

331 * If [end] is omitted, it defaults to `codeUnits.length`.	328 * If [end] is omitted, it defaults to `codeUnits.length`.

332 *	329 *

333 * If the [codeUnits] start with the encoding of a	330 * If the [codeUnits] start with the encoding of a

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
370 // Override the base-classes bind, to provide a better type.	367 // Override the base-classes bind, to provide a better type.

371 Stream<String> bind(Stream<List<int>> stream) => super.bind(stream);	368 Stream<String> bind(Stream<List<int>> stream) => super.bind(stream);

372	369

373 external Converter<List<int>, T> fuse<T>(Converter<String, T> next);	370 external Converter<List<int>, T> fuse<T>(Converter<String, T> next);

374	371

375 external static String _convertIntercepted(	372 external static String _convertIntercepted(

376 bool allowMalformed, List<int> codeUnits, int start, int end);	373 bool allowMalformed, List<int> codeUnits, int start, int end);

377 }	374 }

378	375

379 // UTF-8 constants.	376 // UTF-8 constants.

380 const int _ONE_BYTE_LIMIT = 0x7f; // 7 bits	377 const int _ONE_BYTE_LIMIT = 0x7f; // 7 bits

381 const int _TWO_BYTE_LIMIT = 0x7ff; // 11 bits	378 const int _TWO_BYTE_LIMIT = 0x7ff; // 11 bits

382 const int _THREE_BYTE_LIMIT = 0xffff; // 16 bits	379 const int _THREE_BYTE_LIMIT = 0xffff; // 16 bits

383 const int _FOUR_BYTE_LIMIT = 0x10ffff; // 21 bits, truncated to Unicode max.	380 const int _FOUR_BYTE_LIMIT = 0x10ffff; // 21 bits, truncated to Unicode max.

384	381

385 // UTF-16 constants.	382 // UTF-16 constants.

386 const int _SURROGATE_MASK = 0xF800;	383 const int _SURROGATE_MASK = 0xF800;

387 const int _SURROGATE_TAG_MASK = 0xFC00;	384 const int _SURROGATE_TAG_MASK = 0xFC00;

388 const int _SURROGATE_VALUE_MASK = 0x3FF;	385 const int _SURROGATE_VALUE_MASK = 0x3FF;

389 const int _LEAD_SURROGATE_MIN = 0xD800;	386 const int _LEAD_SURROGATE_MIN = 0xD800;

390 const int _TAIL_SURROGATE_MIN = 0xDC00;	387 const int _TAIL_SURROGATE_MIN = 0xDC00;

391	388

392 bool _isLeadSurrogate(int codeUnit) =>	389 bool _isLeadSurrogate(int codeUnit) =>

393 (codeUnit & _SURROGATE_TAG_MASK) == _LEAD_SURROGATE_MIN;	390 (codeUnit & _SURROGATE_TAG_MASK) == _LEAD_SURROGATE_MIN;

394 bool _isTailSurrogate(int codeUnit) =>	391 bool _isTailSurrogate(int codeUnit) =>

395 (codeUnit & _SURROGATE_TAG_MASK) == _TAIL_SURROGATE_MIN;	392 (codeUnit & _SURROGATE_TAG_MASK) == _TAIL_SURROGATE_MIN;

396 int _combineSurrogatePair(int lead, int tail) =>	393 int _combineSurrogatePair(int lead, int tail) =>

397 0x10000 + ((lead & _SURROGATE_VALUE_MASK) << 10)	394 0x10000 + ((lead & _SURROGATE_VALUE_MASK) << 10) \|

398 \| (tail & _SURROGATE_VALUE_MASK);	395 (tail & _SURROGATE_VALUE_MASK);

399	396

400 /**	397 /**

401 * Decodes UTF-8.	398 * Decodes UTF-8.

402 *	399 *

403 * The decoder handles chunked input.	400 * The decoder handles chunked input.

404 */	401 */

405 // TODO(floitsch): make this class public.	402 // TODO(floitsch): make this class public.

406 class _Utf8Decoder {	403 class _Utf8Decoder {

407 final bool _allowMalformed;	404 final bool _allowMalformed;

408 final StringSink _stringSink;	405 final StringSink _stringSink;

409 bool _isFirstCharacter = true;	406 bool _isFirstCharacter = true;

410 int _value = 0;	407 int _value = 0;

411 int _expectedUnits = 0;	408 int _expectedUnits = 0;

412 int _extraUnits = 0;	409 int _extraUnits = 0;

413	410

414 _Utf8Decoder(this._stringSink, this._allowMalformed);	411 _Utf8Decoder(this._stringSink, this._allowMalformed);

415	412

416 bool get hasPartialInput => _expectedUnits > 0;	413 bool get hasPartialInput => _expectedUnits > 0;

417	414

418 // Limits of one through four byte encodings.	415 // Limits of one through four byte encodings.

419 static const List<int> _LIMITS = const <int>[	416 static const List<int> _LIMITS = const <int>[

420 _ONE_BYTE_LIMIT,	417 _ONE_BYTE_LIMIT,

421 _TWO_BYTE_LIMIT,	418 _TWO_BYTE_LIMIT,

422 _THREE_BYTE_LIMIT,	419 _THREE_BYTE_LIMIT,

423 _FOUR_BYTE_LIMIT ];	420 _FOUR_BYTE_LIMIT

	421 ];

424	422

425 void close() {	423 void close() {

426 flush();	424 flush();

427 }	425 }

428	426

429 /**	427 /**

430 * Flushes this decoder as if closed.	428 * Flushes this decoder as if closed.

431 *	429 *

432 * This method throws if the input was partial and the decoder was	430 * This method throws if the input was partial and the decoder was

433 * constructed with `allowMalformed` set to `false`.	431 * constructed with `allowMalformed` set to `false`.

434 *	432 *

435 * The [source] and [offset] of the current position may be provided,	433 * The [source] and [offset] of the current position may be provided,

436 * and are included in the exception if one is thrown.	434 * and are included in the exception if one is thrown.

437 */	435 */

438 void flush([List<int> source, int offset]) {	436 void flush([List<int> source, int offset]) {

439 if (hasPartialInput) {	437 if (hasPartialInput) {

440 if (!_allowMalformed) {	438 if (!_allowMalformed) {

441 throw new FormatException("Unfinished UTF-8 octet sequence",	439 throw new FormatException(

442 source, offset);	440 "Unfinished UTF-8 octet sequence", source, offset);

443 }	441 }

444 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE);	442 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE);

445 _value = 0;	443 _value = 0;

446 _expectedUnits = 0;	444 _expectedUnits = 0;

447 _extraUnits = 0;	445 _extraUnits = 0;

448 }	446 }

449 }	447 }

450	448

451 void convert(List<int> codeUnits, int startIndex, int endIndex) {	449 void convert(List<int> codeUnits, int startIndex, int endIndex) {

452 int value = _value;	450 int value = _value;

(...skipping 13 matching lines...) Expand all Loading...
466 return to - from;	464 return to - from;

467 }	465 }

468	466

469 void addSingleBytes(int from, int to) {	467 void addSingleBytes(int from, int to) {

470 assert(from >= startIndex && from <= endIndex);	468 assert(from >= startIndex && from <= endIndex);

471 assert(to >= startIndex && to <= endIndex);	469 assert(to >= startIndex && to <= endIndex);

472 _stringSink.write(new String.fromCharCodes(codeUnits, from, to));	470 _stringSink.write(new String.fromCharCodes(codeUnits, from, to));

473 }	471 }

474	472

475 int i = startIndex;	473 int i = startIndex;

476 loop: while (true) {	474 loop:

477 multibyte: if (expectedUnits > 0) {	475 while (true) {

	476 multibyte:

	477 if (expectedUnits > 0) {

478 do {	478 do {

479 if (i == endIndex) {	479 if (i == endIndex) {

480 break loop;	480 break loop;

481 }	481 }

482 int unit = codeUnits[i];	482 int unit = codeUnits[i];

483 if ((unit & 0xC0) != 0x80) {	483 if ((unit & 0xC0) != 0x80) {

484 expectedUnits = 0;	484 expectedUnits = 0;

485 if (!_allowMalformed) {	485 if (!_allowMalformed) {

486 throw new FormatException(	486 throw new FormatException(

487 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}",	487 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}",

488 codeUnits, i);	488 codeUnits,

	489 i);

489 }	490 }

490 _isFirstCharacter = false;	491 _isFirstCharacter = false;

491 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE);	492 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE);

492 break multibyte;	493 break multibyte;

493 } else {	494 } else {

494 value = (value << 6) \| (unit & 0x3f);	495 value = (value << 6) \| (unit & 0x3f);

495 expectedUnits--;	496 expectedUnits--;

496 i++;	497 i++;

497 }	498 }

498 } while (expectedUnits > 0);	499 } while (expectedUnits > 0);

499 if (value <= _LIMITS[extraUnits - 1]) {	500 if (value <= _LIMITS[extraUnits - 1]) {

500 // Overly long encoding. The value could be encoded with a shorter	501 // Overly long encoding. The value could be encoded with a shorter

501 // encoding.	502 // encoding.

502 if (!_allowMalformed) {	503 if (!_allowMalformed) {

503 throw new FormatException(	504 throw new FormatException(

504 "Overlong encoding of 0x${value.toRadixString(16)}",	505 "Overlong encoding of 0x${value.toRadixString(16)}",

505 codeUnits, i - extraUnits - 1);	506 codeUnits,

	507 i - extraUnits - 1);

506 }	508 }

507 expectedUnits = extraUnits = 0;	509 expectedUnits = extraUnits = 0;

508 value = UNICODE_REPLACEMENT_CHARACTER_RUNE;	510 value = UNICODE_REPLACEMENT_CHARACTER_RUNE;

509 }	511 }

510 if (value > _FOUR_BYTE_LIMIT) {	512 if (value > _FOUR_BYTE_LIMIT) {

511 if (!_allowMalformed) {	513 if (!_allowMalformed) {

512 throw new FormatException("Character outside valid Unicode range: "	514 throw new FormatException(

513 "0x${value.toRadixString(16)}",	515 "Character outside valid Unicode range: "

514 codeUnits, i - extraUnits - 1);	516 "0x${value.toRadixString(16)}",

	517 codeUnits,

	518 i - extraUnits - 1);

515 }	519 }

516 value = UNICODE_REPLACEMENT_CHARACTER_RUNE;	520 value = UNICODE_REPLACEMENT_CHARACTER_RUNE;

517 }	521 }

518 if (!_isFirstCharacter \|\| value != UNICODE_BOM_CHARACTER_RUNE) {	522 if (!_isFirstCharacter \|\| value != UNICODE_BOM_CHARACTER_RUNE) {

519 _stringSink.writeCharCode(value);	523 _stringSink.writeCharCode(value);

520 }	524 }

521 _isFirstCharacter = false;	525 _isFirstCharacter = false;

522 }	526 }

523	527

524 while (i < endIndex) {	528 while (i < endIndex) {

525 int oneBytes = scanOneByteCharacters(codeUnits, i);	529 int oneBytes = scanOneByteCharacters(codeUnits, i);

526 if (oneBytes > 0) {	530 if (oneBytes > 0) {

527 _isFirstCharacter = false;	531 _isFirstCharacter = false;

528 addSingleBytes(i, i + oneBytes);	532 addSingleBytes(i, i + oneBytes);

529 i += oneBytes;	533 i += oneBytes;

530 if (i == endIndex) break;	534 if (i == endIndex) break;

531 }	535 }

532 int unit = codeUnits[i++];	536 int unit = codeUnits[i++];

533 // TODO(floitsch): the way we test we could potentially allow	537 // TODO(floitsch): the way we test we could potentially allow

534 // units that are too large, if they happen to have the	538 // units that are too large, if they happen to have the

535 // right bit-pattern. (Same is true for the multibyte loop above).	539 // right bit-pattern. (Same is true for the multibyte loop above).

536 // TODO(floitsch): optimize this loop. See:	540 // TODO(floitsch): optimize this loop. See:

537 // https://codereview.chromium.org/22929022/diff/1/sdk/lib/convert/utf.d art?column_width=80	541 // https://codereview.chromium.org/22929022/diff/1/sdk/lib/convert/utf.d art?column_width=80

538 if (unit < 0) {	542 if (unit < 0) {

539 // TODO(floitsch): should this be unit <= 0 ?	543 // TODO(floitsch): should this be unit <= 0 ?

540 if (!_allowMalformed) {	544 if (!_allowMalformed) {

541 throw new FormatException(	545 throw new FormatException(

542 "Negative UTF-8 code unit: -0x${(-unit).toRadixString(16)}",	546 "Negative UTF-8 code unit: -0x${(-unit).toRadixString(16)}",

543 codeUnits, i - 1);	547 codeUnits,

	548 i - 1);

544 }	549 }

545 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE);	550 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE);

546 } else {	551 } else {

547 assert(unit > _ONE_BYTE_LIMIT);	552 assert(unit > _ONE_BYTE_LIMIT);

548 if ((unit & 0xE0) == 0xC0) {	553 if ((unit & 0xE0) == 0xC0) {

549 value = unit & 0x1F;	554 value = unit & 0x1F;

550 expectedUnits = extraUnits = 1;	555 expectedUnits = extraUnits = 1;

551 continue loop;	556 continue loop;

552 }	557 }

553 if ((unit & 0xF0) == 0xE0) {	558 if ((unit & 0xF0) == 0xE0) {

554 value = unit & 0x0F;	559 value = unit & 0x0F;

555 expectedUnits = extraUnits = 2;	560 expectedUnits = extraUnits = 2;

556 continue loop;	561 continue loop;

557 }	562 }

558 // 0xF5, 0xF6 ... 0xFF never appear in valid UTF-8 sequences.	563 // 0xF5, 0xF6 ... 0xFF never appear in valid UTF-8 sequences.

559 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) {	564 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) {

560 value = unit & 0x07;	565 value = unit & 0x07;

561 expectedUnits = extraUnits = 3;	566 expectedUnits = extraUnits = 3;

562 continue loop;	567 continue loop;

563 }	568 }

564 if (!_allowMalformed) {	569 if (!_allowMalformed) {

565 throw new FormatException(	570 throw new FormatException(

566 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}",	571 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}",

567 codeUnits, i - 1);	572 codeUnits,

	573 i - 1);

568 }	574 }

569 value = UNICODE_REPLACEMENT_CHARACTER_RUNE;	575 value = UNICODE_REPLACEMENT_CHARACTER_RUNE;

570 expectedUnits = extraUnits = 0;	576 expectedUnits = extraUnits = 0;

571 _isFirstCharacter = false;	577 _isFirstCharacter = false;

572 _stringSink.writeCharCode(value);	578 _stringSink.writeCharCode(value);

573 }	579 }

574 }	580 }

575 break loop;	581 break loop;

576 }	582 }

577 if (expectedUnits > 0) {	583 if (expectedUnits > 0) {

578 _value = value;	584 _value = value;

579 _expectedUnits = expectedUnits;	585 _expectedUnits = expectedUnits;

580 _extraUnits = extraUnits;	586 _extraUnits = extraUnits;

581 }	587 }

582 }	588 }

583 }	589 }

OLD	NEW

« no previous file with comments | « sdk/lib/convert/string_conversion.dart ('k') | sdk/lib/core/bool.dart » ('j') | no next file with comments »