OLD | NEW |
---|---|
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of dart.convert; | 5 part of dart.convert; |
6 | 6 |
7 /** The Unicode Replacement character `U+FFFD` (�). */ | 7 /** The Unicode Replacement character `U+FFFD` (�). */ |
8 const UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD; | 8 const UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD; |
9 | 9 |
10 /** The Unicode Byte Order Marker (BOM) character `U+FEFF`. */ | |
11 const UNICODE_BOM_CHARACTER_RUNE = 0xFEFF; | |
12 | |
10 /** | 13 /** |
11 * An instance of the default implementation of the [Utf8Codec]. | 14 * An instance of the default implementation of the [Utf8Codec]. |
12 * | 15 * |
13 * This instance provides a convenient access to the most common UTF-8 | 16 * This instance provides a convenient access to the most common UTF-8 |
14 * use cases. | 17 * use cases. |
15 * | 18 * |
16 * Examples: | 19 * Examples: |
17 * | 20 * |
18 * var encoded = UTF8.encode("Îñţérñåţîöñåļîžåţîờñ"); | 21 * var encoded = UTF8.encode("Îñţérñåţîöñåļîžåţîờñ"); |
19 * var decoded = UTF8.decode([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6, | 22 * var decoded = UTF8.decode([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6, |
(...skipping 21 matching lines...) Expand all Loading... | |
41 */ | 44 */ |
42 const Utf8Codec({ bool allowMalformed: false }) | 45 const Utf8Codec({ bool allowMalformed: false }) |
43 : _allowMalformed = allowMalformed; | 46 : _allowMalformed = allowMalformed; |
44 | 47 |
45 String get name => "utf-8"; | 48 String get name => "utf-8"; |
46 | 49 |
47 /** | 50 /** |
48 * Decodes the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the | 51 * Decodes the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the |
49 * corresponding string. | 52 * corresponding string. |
50 * | 53 * |
54 * If the [codeUnits] start with a leading [UNICODE_BOM_CHARACTER_RUNE] this | |
Anders Johnsen
2013/10/01 11:32:29
Is this really happening? I don't see any usage of
floitsch
2013/10/01 11:37:42
There was a private copy of it. I removed that one
| |
55 * character is discarded. | |
56 * | |
51 * If [allowMalformed] is `true` the decoder replaces invalid (or | 57 * If [allowMalformed] is `true` the decoder replaces invalid (or |
52 * unterminated) character sequences with the Unicode Replacement character | 58 * unterminated) character sequences with the Unicode Replacement character |
53 * `U+FFFD` (�). Otherwise it throws a [FormatException]. | 59 * `U+FFFD` (�). Otherwise it throws a [FormatException]. |
54 * | 60 * |
55 * If [allowMalformed] is not given, it defaults to the `allowMalformed` that | 61 * If [allowMalformed] is not given, it defaults to the `allowMalformed` that |
56 * was used to instantiate `this`. | 62 * was used to instantiate `this`. |
57 */ | 63 */ |
58 String decode(List<int> codeUnits, { bool allowMalformed }) { | 64 String decode(List<int> codeUnits, { bool allowMalformed }) { |
59 if (allowMalformed == null) allowMalformed = _allowMalformed; | 65 if (allowMalformed == null) allowMalformed = _allowMalformed; |
60 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); | 66 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); |
(...skipping 235 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
296 * If it is `true` [convert] replaces invalid (or unterminated) character | 302 * If it is `true` [convert] replaces invalid (or unterminated) character |
297 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise | 303 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise |
298 * it throws a [FormatException]. | 304 * it throws a [FormatException]. |
299 */ | 305 */ |
300 const Utf8Decoder({ bool allowMalformed: false }) | 306 const Utf8Decoder({ bool allowMalformed: false }) |
301 : this._allowMalformed = allowMalformed; | 307 : this._allowMalformed = allowMalformed; |
302 | 308 |
303 /** | 309 /** |
304 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the | 310 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the |
305 * corresponding string. | 311 * corresponding string. |
312 * | |
313 * If the [codeUnits] start with a leading [UNICODE_BOM_CHARACTER_RUNE] this | |
314 * character is discarded. | |
306 */ | 315 */ |
307 String convert(List<int> codeUnits) { | 316 String convert(List<int> codeUnits) { |
308 StringBuffer buffer = new StringBuffer(); | 317 StringBuffer buffer = new StringBuffer(); |
309 _Utf8Decoder decoder = new _Utf8Decoder(buffer, _allowMalformed); | 318 _Utf8Decoder decoder = new _Utf8Decoder(buffer, _allowMalformed); |
310 decoder.convert(codeUnits, 0, codeUnits.length); | 319 decoder.convert(codeUnits, 0, codeUnits.length); |
311 decoder.close(); | 320 decoder.close(); |
312 return buffer.toString(); | 321 return buffer.toString(); |
313 } | 322 } |
314 | 323 |
315 /** | 324 /** |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
349 const int _REPLACEMENT_CHARACTER = 0xFFFD; | 358 const int _REPLACEMENT_CHARACTER = 0xFFFD; |
350 const int _BOM_CHARACTER = 0xFEFF; | 359 const int _BOM_CHARACTER = 0xFEFF; |
351 | 360 |
352 bool _isSurrogate(int codeUnit) => | 361 bool _isSurrogate(int codeUnit) => |
353 (codeUnit & _SURROGATE_MASK) == _LEAD_SURROGATE_MIN; | 362 (codeUnit & _SURROGATE_MASK) == _LEAD_SURROGATE_MIN; |
354 bool _isLeadSurrogate(int codeUnit) => | 363 bool _isLeadSurrogate(int codeUnit) => |
355 (codeUnit & _SURROGATE_TAG_MASK) == _LEAD_SURROGATE_MIN; | 364 (codeUnit & _SURROGATE_TAG_MASK) == _LEAD_SURROGATE_MIN; |
356 bool _isTailSurrogate(int codeUnit) => | 365 bool _isTailSurrogate(int codeUnit) => |
357 (codeUnit & _SURROGATE_TAG_MASK) == _TAIL_SURROGATE_MIN; | 366 (codeUnit & _SURROGATE_TAG_MASK) == _TAIL_SURROGATE_MIN; |
358 int _combineSurrogatePair(int lead, int tail) => | 367 int _combineSurrogatePair(int lead, int tail) => |
359 0x10000 | ((lead & _SURROGATE_VALUE_MASK) << 10) | 368 0x10000 + ((lead & _SURROGATE_VALUE_MASK) << 10) |
360 | (tail & _SURROGATE_VALUE_MASK); | 369 | (tail & _SURROGATE_VALUE_MASK); |
361 | 370 |
362 | 371 |
363 /** | 372 /** |
364 * Decodes UTF-8. | 373 * Decodes UTF-8. |
365 * | 374 * |
366 * The decoder handles chunked input. | 375 * The decoder handles chunked input. |
367 */ | 376 */ |
368 // TODO(floitsch): make this class public. | 377 // TODO(floitsch): make this class public. |
369 class _Utf8Decoder { | 378 class _Utf8Decoder { |
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
507 } | 516 } |
508 break loop; | 517 break loop; |
509 } | 518 } |
510 if (expectedUnits > 0) { | 519 if (expectedUnits > 0) { |
511 _value = value; | 520 _value = value; |
512 _expectedUnits = expectedUnits; | 521 _expectedUnits = expectedUnits; |
513 _extraUnits = extraUnits; | 522 _extraUnits = extraUnits; |
514 } | 523 } |
515 } | 524 } |
516 } | 525 } |
OLD | NEW |