OLD | NEW |
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of dart.convert; | 5 part of dart.convert; |
6 | 6 |
| 7 /** The Unicode Replacement character `U+FFFD` (�). */ |
| 8 const UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD; |
| 9 |
7 /** | 10 /** |
8 * An instance of the default implementation of the [Utf8Codec]. | 11 * An instance of the default implementation of the [Utf8Codec]. |
9 * | 12 * |
10 * This instance provides a convenient access to the most common UTF-8 | 13 * This instance provides a convenient access to the most common UTF-8 |
11 * use cases. | 14 * use cases. |
12 * | 15 * |
13 * Examples: | 16 * Examples: |
14 * | 17 * |
15 * var encoded = UTF8.encode("Îñţérñåţîöñåļîžåţîờñ"); | 18 * var encoded = UTF8.encode("Îñţérñåţîöñåļîžåţîờñ"); |
16 * var decoded = UTF8.decode([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6, | 19 * var decoded = UTF8.decode([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6, |
17 * 0x72, 0x67, 0x72, 0xc3, 0xb8, 0x64]); | 20 * 0x72, 0x67, 0x72, 0xc3, 0xb8, 0x64]); |
18 */ | 21 */ |
19 const UTF8 = const Utf8Codec(); | 22 const UTF8 = const Utf8Codec(); |
20 | 23 |
21 /** | 24 /** |
22 * A [Utf8Codec] encodes strings to utf-8 code units (bytes) and decodes | 25 * A [Utf8Codec] encodes strings to utf-8 code units (bytes) and decodes |
23 * UTF-8 code units to strings. | 26 * UTF-8 code units to strings. |
24 */ | 27 */ |
25 class Utf8Codec extends _Encoding { | 28 class Utf8Codec extends Encoding { |
26 final bool _allowMalformed; | 29 final bool _allowMalformed; |
27 | 30 |
28 /** | 31 /** |
29 * Instantiates a new [Utf8Codec]. | 32 * Instantiates a new [Utf8Codec]. |
30 * | 33 * |
31 * The optional [allowMalformed] argument defines how [decoder] (and [decode]) | 34 * The optional [allowMalformed] argument defines how [decoder] (and [decode]) |
32 * deal with invalid or unterminated character sequences. | 35 * deal with invalid or unterminated character sequences. |
33 * | 36 * |
34 * If it is `true` (and not overriden at the method invocation) [decode] and | 37 * If it is `true` (and not overriden at the method invocation) [decode] and |
35 * the [decoder] replace invalid (or unterminated) octet | 38 * the [decoder] replace invalid (or unterminated) octet |
36 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise | 39 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise |
37 * they throw a [FormatException]. | 40 * they throw a [FormatException]. |
38 */ | 41 */ |
39 const Utf8Codec({ bool allowMalformed: false }) | 42 const Utf8Codec({ bool allowMalformed: false }) |
40 : _allowMalformed = allowMalformed; | 43 : _allowMalformed = allowMalformed; |
41 | 44 |
| 45 String get name => "utf-8"; |
| 46 |
42 /** | 47 /** |
43 * Decodes the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the | 48 * Decodes the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the |
44 * corresponding string. | 49 * corresponding string. |
45 * | 50 * |
46 * If [allowMalformed] is `true` the decoder replaces invalid (or | 51 * If [allowMalformed] is `true` the decoder replaces invalid (or |
47 * unterminated) character sequences with the Unicode Replacement character | 52 * unterminated) character sequences with the Unicode Replacement character |
48 * `U+FFFD` (�). Otherwise it throws a [FormatException]. | 53 * `U+FFFD` (�). Otherwise it throws a [FormatException]. |
49 * | 54 * |
50 * If [allowMalformed] is not given, it defaults to the `allowMalformed` that | 55 * If [allowMalformed] is not given, it defaults to the `allowMalformed` that |
51 * was used to instantiate `this`. | 56 * was used to instantiate `this`. |
52 */ | 57 */ |
53 String decode(List<int> codeUnits, { bool allowMalformed }) { | 58 String decode(List<int> codeUnits, { bool allowMalformed }) { |
54 if (allowMalformed == null) allowMalformed = _allowMalformed; | 59 if (allowMalformed == null) allowMalformed = _allowMalformed; |
55 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); | 60 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); |
56 } | 61 } |
57 | 62 |
58 Converter<String, List<int>> get encoder => new Utf8Encoder(); | 63 Converter<String, List<int>> get encoder => new Utf8Encoder(); |
59 Converter<List<int>, String> get decoder { | 64 Converter<List<int>, String> get decoder { |
60 return new Utf8Decoder(allowMalformed: _allowMalformed); | 65 return new Utf8Decoder(allowMalformed: _allowMalformed); |
61 } | 66 } |
62 } | 67 } |
63 | 68 |
64 /** | 69 /** |
65 * This class converts strings to their UTF-8 code units (a list of | 70 * This class converts strings to their UTF-8 code units (a list of |
66 * unsigned 8-bit integers). | 71 * unsigned 8-bit integers). |
67 */ | 72 */ |
68 class Utf8Encoder extends Converter<String, List<int>> { | 73 class Utf8Encoder extends Converter<String, List<int>> { |
| 74 |
| 75 const Utf8Encoder(); |
| 76 |
69 /** | 77 /** |
70 * Converts [string] to its UTF-8 code units (a list of | 78 * Converts [string] to its UTF-8 code units (a list of |
71 * unsigned 8-bit integers). | 79 * unsigned 8-bit integers). |
72 */ | 80 */ |
73 List<int> convert(String string) { | 81 List<int> convert(String string) { |
74 // Create a new encoder with a length that is guaranteed to be big enough. | 82 // Create a new encoder with a length that is guaranteed to be big enough. |
75 // A single code unit uses at most 3 bytes. Two code units at most 4. | 83 // A single code unit uses at most 3 bytes. Two code units at most 4. |
76 _Utf8Encoder encoder = new _Utf8Encoder.withBufferSize(string.length * 3); | 84 _Utf8Encoder encoder = new _Utf8Encoder.withBufferSize(string.length * 3); |
77 int endPosition = encoder._fillBuffer(string, 0, string.length); | 85 int endPosition = encoder._fillBuffer(string, 0, string.length); |
78 assert(endPosition >= string.length - 1); | 86 assert(endPosition >= string.length - 1); |
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
282 /** | 290 /** |
283 * Instantiates a new [Utf8Decoder]. | 291 * Instantiates a new [Utf8Decoder]. |
284 * | 292 * |
285 * The optional [allowMalformed] argument defines how [convert] deals | 293 * The optional [allowMalformed] argument defines how [convert] deals |
286 * with invalid or unterminated character sequences. | 294 * with invalid or unterminated character sequences. |
287 * | 295 * |
288 * If it is `true` [convert] replaces invalid (or unterminated) character | 296 * If it is `true` [convert] replaces invalid (or unterminated) character |
289 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise | 297 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise |
290 * it throws a [FormatException]. | 298 * it throws a [FormatException]. |
291 */ | 299 */ |
292 Utf8Decoder({ bool allowMalformed: false }) | 300 const Utf8Decoder({ bool allowMalformed: false }) |
293 : this._allowMalformed = allowMalformed; | 301 : this._allowMalformed = allowMalformed; |
294 | 302 |
295 /** | 303 /** |
296 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the | 304 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the |
297 * corresponding string. | 305 * corresponding string. |
298 */ | 306 */ |
299 String convert(List<int> codeUnits) { | 307 String convert(List<int> codeUnits) { |
300 StringBuffer buffer = new StringBuffer(); | 308 StringBuffer buffer = new StringBuffer(); |
301 _Utf8Decoder decoder = new _Utf8Decoder(buffer, _allowMalformed); | 309 _Utf8Decoder decoder = new _Utf8Decoder(buffer, _allowMalformed); |
302 decoder.convert(codeUnits, 0, codeUnits.length); | 310 decoder.convert(codeUnits, 0, codeUnits.length); |
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
499 } | 507 } |
500 break loop; | 508 break loop; |
501 } | 509 } |
502 if (expectedUnits > 0) { | 510 if (expectedUnits > 0) { |
503 _value = value; | 511 _value = value; |
504 _expectedUnits = expectedUnits; | 512 _expectedUnits = expectedUnits; |
505 _extraUnits = extraUnits; | 513 _extraUnits = extraUnits; |
506 } | 514 } |
507 } | 515 } |
508 } | 516 } |
OLD | NEW |