Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 part of dart.convert; | 5 part of dart.convert; |
| 6 | 6 |
| 7 /** The Unicode Replacement character `U+FFFD` (�). */ | 7 /** The Unicode Replacement character `U+FFFD` (�). */ |
| 8 const UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD; | 8 const UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD; |
| 9 | 9 |
| 10 /** The Unicode Byte Order Marker (BOM) character `U+FEFF`. */ | |
| 11 const UNICODE_BOM_CHARACTER_RUNE = 0xFEFF; | |
| 12 | |
| 10 /** | 13 /** |
| 11 * An instance of the default implementation of the [Utf8Codec]. | 14 * An instance of the default implementation of the [Utf8Codec]. |
| 12 * | 15 * |
| 13 * This instance provides a convenient access to the most common UTF-8 | 16 * This instance provides a convenient access to the most common UTF-8 |
| 14 * use cases. | 17 * use cases. |
| 15 * | 18 * |
| 16 * Examples: | 19 * Examples: |
| 17 * | 20 * |
| 18 * var encoded = UTF8.encode("Îñţérñåţîöñåļîžåţîờñ"); | 21 * var encoded = UTF8.encode("Îñţérñåţîöñåļîžåţîờñ"); |
| 19 * var decoded = UTF8.decode([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6, | 22 * var decoded = UTF8.decode([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6, |
| (...skipping 21 matching lines...) Expand all Loading... | |
| 41 */ | 44 */ |
| 42 const Utf8Codec({ bool allowMalformed: false }) | 45 const Utf8Codec({ bool allowMalformed: false }) |
| 43 : _allowMalformed = allowMalformed; | 46 : _allowMalformed = allowMalformed; |
| 44 | 47 |
| 45 String get name => "utf-8"; | 48 String get name => "utf-8"; |
| 46 | 49 |
| 47 /** | 50 /** |
| 48 * Decodes the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the | 51 * Decodes the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the |
| 49 * corresponding string. | 52 * corresponding string. |
| 50 * | 53 * |
| 54 * If the [codeUnits] start with a leading [UNICODE_BOM_CHARACTER_RUNE] this | |
|
Anders Johnsen
2013/10/01 11:32:29
Is this really happening? I don't see any usage of
floitsch
2013/10/01 11:37:42
There was a private copy of it. I removed that one
| |
| 55 * character is discarded. | |
| 56 * | |
| 51 * If [allowMalformed] is `true` the decoder replaces invalid (or | 57 * If [allowMalformed] is `true` the decoder replaces invalid (or |
| 52 * unterminated) character sequences with the Unicode Replacement character | 58 * unterminated) character sequences with the Unicode Replacement character |
| 53 * `U+FFFD` (�). Otherwise it throws a [FormatException]. | 59 * `U+FFFD` (�). Otherwise it throws a [FormatException]. |
| 54 * | 60 * |
| 55 * If [allowMalformed] is not given, it defaults to the `allowMalformed` that | 61 * If [allowMalformed] is not given, it defaults to the `allowMalformed` that |
| 56 * was used to instantiate `this`. | 62 * was used to instantiate `this`. |
| 57 */ | 63 */ |
| 58 String decode(List<int> codeUnits, { bool allowMalformed }) { | 64 String decode(List<int> codeUnits, { bool allowMalformed }) { |
| 59 if (allowMalformed == null) allowMalformed = _allowMalformed; | 65 if (allowMalformed == null) allowMalformed = _allowMalformed; |
| 60 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); | 66 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); |
| (...skipping 235 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 296 * If it is `true` [convert] replaces invalid (or unterminated) character | 302 * If it is `true` [convert] replaces invalid (or unterminated) character |
| 297 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise | 303 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise |
| 298 * it throws a [FormatException]. | 304 * it throws a [FormatException]. |
| 299 */ | 305 */ |
| 300 const Utf8Decoder({ bool allowMalformed: false }) | 306 const Utf8Decoder({ bool allowMalformed: false }) |
| 301 : this._allowMalformed = allowMalformed; | 307 : this._allowMalformed = allowMalformed; |
| 302 | 308 |
| 303 /** | 309 /** |
| 304 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the | 310 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the |
| 305 * corresponding string. | 311 * corresponding string. |
| 312 * | |
| 313 * If the [codeUnits] start with a leading [UNICODE_BOM_CHARACTER_RUNE] this | |
| 314 * character is discarded. | |
| 306 */ | 315 */ |
| 307 String convert(List<int> codeUnits) { | 316 String convert(List<int> codeUnits) { |
| 308 StringBuffer buffer = new StringBuffer(); | 317 StringBuffer buffer = new StringBuffer(); |
| 309 _Utf8Decoder decoder = new _Utf8Decoder(buffer, _allowMalformed); | 318 _Utf8Decoder decoder = new _Utf8Decoder(buffer, _allowMalformed); |
| 310 decoder.convert(codeUnits, 0, codeUnits.length); | 319 decoder.convert(codeUnits, 0, codeUnits.length); |
| 311 decoder.close(); | 320 decoder.close(); |
| 312 return buffer.toString(); | 321 return buffer.toString(); |
| 313 } | 322 } |
| 314 | 323 |
| 315 /** | 324 /** |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 349 const int _REPLACEMENT_CHARACTER = 0xFFFD; | 358 const int _REPLACEMENT_CHARACTER = 0xFFFD; |
| 350 const int _BOM_CHARACTER = 0xFEFF; | 359 const int _BOM_CHARACTER = 0xFEFF; |
| 351 | 360 |
| 352 bool _isSurrogate(int codeUnit) => | 361 bool _isSurrogate(int codeUnit) => |
| 353 (codeUnit & _SURROGATE_MASK) == _LEAD_SURROGATE_MIN; | 362 (codeUnit & _SURROGATE_MASK) == _LEAD_SURROGATE_MIN; |
| 354 bool _isLeadSurrogate(int codeUnit) => | 363 bool _isLeadSurrogate(int codeUnit) => |
| 355 (codeUnit & _SURROGATE_TAG_MASK) == _LEAD_SURROGATE_MIN; | 364 (codeUnit & _SURROGATE_TAG_MASK) == _LEAD_SURROGATE_MIN; |
| 356 bool _isTailSurrogate(int codeUnit) => | 365 bool _isTailSurrogate(int codeUnit) => |
| 357 (codeUnit & _SURROGATE_TAG_MASK) == _TAIL_SURROGATE_MIN; | 366 (codeUnit & _SURROGATE_TAG_MASK) == _TAIL_SURROGATE_MIN; |
| 358 int _combineSurrogatePair(int lead, int tail) => | 367 int _combineSurrogatePair(int lead, int tail) => |
| 359 0x10000 | ((lead & _SURROGATE_VALUE_MASK) << 10) | 368 0x10000 + ((lead & _SURROGATE_VALUE_MASK) << 10) |
| 360 | (tail & _SURROGATE_VALUE_MASK); | 369 | (tail & _SURROGATE_VALUE_MASK); |
| 361 | 370 |
| 362 | 371 |
| 363 /** | 372 /** |
| 364 * Decodes UTF-8. | 373 * Decodes UTF-8. |
| 365 * | 374 * |
| 366 * The decoder handles chunked input. | 375 * The decoder handles chunked input. |
| 367 */ | 376 */ |
| 368 // TODO(floitsch): make this class public. | 377 // TODO(floitsch): make this class public. |
| 369 class _Utf8Decoder { | 378 class _Utf8Decoder { |
| (...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 507 } | 516 } |
| 508 break loop; | 517 break loop; |
| 509 } | 518 } |
| 510 if (expectedUnits > 0) { | 519 if (expectedUnits > 0) { |
| 511 _value = value; | 520 _value = value; |
| 512 _expectedUnits = expectedUnits; | 521 _expectedUnits = expectedUnits; |
| 513 _extraUnits = extraUnits; | 522 _extraUnits = extraUnits; |
| 514 } | 523 } |
| 515 } | 524 } |
| 516 } | 525 } |
| OLD | NEW |