| OLD | NEW |
| 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 part of dart.convert; | 5 part of dart.convert; |
| 6 | 6 |
| 7 /** The Unicode Replacement character `U+FFFD` (�). */ |
| 8 const UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD; |
| 9 |
| 7 /** | 10 /** |
| 8 * An instance of the default implementation of the [Utf8Codec]. | 11 * An instance of the default implementation of the [Utf8Codec]. |
| 9 * | 12 * |
| 10 * This instance provides a convenient access to the most common UTF-8 | 13 * This instance provides a convenient access to the most common UTF-8 |
| 11 * use cases. | 14 * use cases. |
| 12 * | 15 * |
| 13 * Examples: | 16 * Examples: |
| 14 * | 17 * |
| 15 * var encoded = UTF8.encode("Îñţérñåţîöñåļîžåţîờñ"); | 18 * var encoded = UTF8.encode("Îñţérñåţîöñåļîžåţîờñ"); |
| 16 * var decoded = UTF8.decode([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6, | 19 * var decoded = UTF8.decode([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6, |
| 17 * 0x72, 0x67, 0x72, 0xc3, 0xb8, 0x64]); | 20 * 0x72, 0x67, 0x72, 0xc3, 0xb8, 0x64]); |
| 18 */ | 21 */ |
| 19 const UTF8 = const Utf8Codec(); | 22 const UTF8 = const Utf8Codec(); |
| 20 | 23 |
| 21 /** | 24 /** |
| 22 * A [Utf8Codec] encodes strings to utf-8 code units (bytes) and decodes | 25 * A [Utf8Codec] encodes strings to utf-8 code units (bytes) and decodes |
| 23 * UTF-8 code units to strings. | 26 * UTF-8 code units to strings. |
| 24 */ | 27 */ |
| 25 class Utf8Codec extends _Encoding { | 28 class Utf8Codec extends Encoding { |
| 26 final bool _allowMalformed; | 29 final bool _allowMalformed; |
| 27 | 30 |
| 28 /** | 31 /** |
| 29 * Instantiates a new [Utf8Codec]. | 32 * Instantiates a new [Utf8Codec]. |
| 30 * | 33 * |
| 31 * The optional [allowMalformed] argument defines how [decoder] (and [decode]) | 34 * The optional [allowMalformed] argument defines how [decoder] (and [decode]) |
| 32 * deal with invalid or unterminated character sequences. | 35 * deal with invalid or unterminated character sequences. |
| 33 * | 36 * |
| 34 * If it is `true` (and not overriden at the method invocation) [decode] and | 37 * If it is `true` (and not overriden at the method invocation) [decode] and |
| 35 * the [decoder] replace invalid (or unterminated) octet | 38 * the [decoder] replace invalid (or unterminated) octet |
| 36 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise | 39 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise |
| 37 * they throw a [FormatException]. | 40 * they throw a [FormatException]. |
| 38 */ | 41 */ |
| 39 const Utf8Codec({ bool allowMalformed: false }) | 42 const Utf8Codec({ bool allowMalformed: false }) |
| 40 : _allowMalformed = allowMalformed; | 43 : _allowMalformed = allowMalformed; |
| 41 | 44 |
| 45 String get name => "utf-8"; |
| 46 |
| 42 /** | 47 /** |
| 43 * Decodes the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the | 48 * Decodes the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the |
| 44 * corresponding string. | 49 * corresponding string. |
| 45 * | 50 * |
| 46 * If [allowMalformed] is `true` the decoder replaces invalid (or | 51 * If [allowMalformed] is `true` the decoder replaces invalid (or |
| 47 * unterminated) character sequences with the Unicode Replacement character | 52 * unterminated) character sequences with the Unicode Replacement character |
| 48 * `U+FFFD` (�). Otherwise it throws a [FormatException]. | 53 * `U+FFFD` (�). Otherwise it throws a [FormatException]. |
| 49 * | 54 * |
| 50 * If [allowMalformed] is not given, it defaults to the `allowMalformed` that | 55 * If [allowMalformed] is not given, it defaults to the `allowMalformed` that |
| 51 * was used to instantiate `this`. | 56 * was used to instantiate `this`. |
| 52 */ | 57 */ |
| 53 String decode(List<int> codeUnits, { bool allowMalformed }) { | 58 String decode(List<int> codeUnits, { bool allowMalformed }) { |
| 54 if (allowMalformed == null) allowMalformed = _allowMalformed; | 59 if (allowMalformed == null) allowMalformed = _allowMalformed; |
| 55 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); | 60 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); |
| 56 } | 61 } |
| 57 | 62 |
| 58 Converter<String, List<int>> get encoder => new Utf8Encoder(); | 63 Converter<String, List<int>> get encoder => new Utf8Encoder(); |
| 59 Converter<List<int>, String> get decoder { | 64 Converter<List<int>, String> get decoder { |
| 60 return new Utf8Decoder(allowMalformed: _allowMalformed); | 65 return new Utf8Decoder(allowMalformed: _allowMalformed); |
| 61 } | 66 } |
| 62 } | 67 } |
| 63 | 68 |
| 64 /** | 69 /** |
| 65 * This class converts strings to their UTF-8 code units (a list of | 70 * This class converts strings to their UTF-8 code units (a list of |
| 66 * unsigned 8-bit integers). | 71 * unsigned 8-bit integers). |
| 67 */ | 72 */ |
| 68 class Utf8Encoder extends Converter<String, List<int>> { | 73 class Utf8Encoder extends Converter<String, List<int>> { |
| 74 |
| 75 const Utf8Encoder(); |
| 76 |
| 69 /** | 77 /** |
| 70 * Converts [string] to its UTF-8 code units (a list of | 78 * Converts [string] to its UTF-8 code units (a list of |
| 71 * unsigned 8-bit integers). | 79 * unsigned 8-bit integers). |
| 72 */ | 80 */ |
| 73 List<int> convert(String string) { | 81 List<int> convert(String string) { |
| 74 // Create a new encoder with a length that is guaranteed to be big enough. | 82 // Create a new encoder with a length that is guaranteed to be big enough. |
| 75 // A single code unit uses at most 3 bytes. Two code units at most 4. | 83 // A single code unit uses at most 3 bytes. Two code units at most 4. |
| 76 _Utf8Encoder encoder = new _Utf8Encoder.withBufferSize(string.length * 3); | 84 _Utf8Encoder encoder = new _Utf8Encoder.withBufferSize(string.length * 3); |
| 77 int endPosition = encoder._fillBuffer(string, 0, string.length); | 85 int endPosition = encoder._fillBuffer(string, 0, string.length); |
| 78 assert(endPosition >= string.length - 1); | 86 assert(endPosition >= string.length - 1); |
| (...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 282 /** | 290 /** |
| 283 * Instantiates a new [Utf8Decoder]. | 291 * Instantiates a new [Utf8Decoder]. |
| 284 * | 292 * |
| 285 * The optional [allowMalformed] argument defines how [convert] deals | 293 * The optional [allowMalformed] argument defines how [convert] deals |
| 286 * with invalid or unterminated character sequences. | 294 * with invalid or unterminated character sequences. |
| 287 * | 295 * |
| 288 * If it is `true` [convert] replaces invalid (or unterminated) character | 296 * If it is `true` [convert] replaces invalid (or unterminated) character |
| 289 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise | 297 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise |
| 290 * it throws a [FormatException]. | 298 * it throws a [FormatException]. |
| 291 */ | 299 */ |
| 292 Utf8Decoder({ bool allowMalformed: false }) | 300 const Utf8Decoder({ bool allowMalformed: false }) |
| 293 : this._allowMalformed = allowMalformed; | 301 : this._allowMalformed = allowMalformed; |
| 294 | 302 |
| 295 /** | 303 /** |
| 296 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the | 304 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the |
| 297 * corresponding string. | 305 * corresponding string. |
| 298 */ | 306 */ |
| 299 String convert(List<int> codeUnits) { | 307 String convert(List<int> codeUnits) { |
| 300 StringBuffer buffer = new StringBuffer(); | 308 StringBuffer buffer = new StringBuffer(); |
| 301 _Utf8Decoder decoder = new _Utf8Decoder(buffer, _allowMalformed); | 309 _Utf8Decoder decoder = new _Utf8Decoder(buffer, _allowMalformed); |
| 302 decoder.convert(codeUnits, 0, codeUnits.length); | 310 decoder.convert(codeUnits, 0, codeUnits.length); |
| (...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 499 } | 507 } |
| 500 break loop; | 508 break loop; |
| 501 } | 509 } |
| 502 if (expectedUnits > 0) { | 510 if (expectedUnits > 0) { |
| 503 _value = value; | 511 _value = value; |
| 504 _expectedUnits = expectedUnits; | 512 _expectedUnits = expectedUnits; |
| 505 _extraUnits = extraUnits; | 513 _extraUnits = extraUnits; |
| 506 } | 514 } |
| 507 } | 515 } |
| 508 } | 516 } |
| OLD | NEW |