OLD | NEW |
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of dart.convert; | 5 part of dart.convert; |
6 | 6 |
7 /** The Unicode Replacement character `U+FFFD` (�). */ | 7 /** The Unicode Replacement character `U+FFFD` (�). */ |
8 const int UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD; | 8 const int UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD; |
9 | 9 |
10 /** The Unicode Byte Order Marker (BOM) character `U+FEFF`. */ | 10 /** The Unicode Byte Order Marker (BOM) character `U+FEFF`. */ |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
59 * `U+FFFD` (�). Otherwise it throws a [FormatException]. | 59 * `U+FFFD` (�). Otherwise it throws a [FormatException]. |
60 * | 60 * |
61 * If [allowMalformed] is not given, it defaults to the `allowMalformed` that | 61 * If [allowMalformed] is not given, it defaults to the `allowMalformed` that |
62 * was used to instantiate `this`. | 62 * was used to instantiate `this`. |
63 */ | 63 */ |
64 String decode(List<int> codeUnits, { bool allowMalformed }) { | 64 String decode(List<int> codeUnits, { bool allowMalformed }) { |
65 if (allowMalformed == null) allowMalformed = _allowMalformed; | 65 if (allowMalformed == null) allowMalformed = _allowMalformed; |
66 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); | 66 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); |
67 } | 67 } |
68 | 68 |
69 Utf8Encoder get encoder => new Utf8Encoder(); | 69 Utf8Encoder get encoder => const Utf8Encoder(); |
70 Utf8Decoder get decoder { | 70 Utf8Decoder get decoder { |
71 return new Utf8Decoder(allowMalformed: _allowMalformed); | 71 return new Utf8Decoder(allowMalformed: _allowMalformed); |
72 } | 72 } |
73 } | 73 } |
74 | 74 |
75 /** | 75 /** |
76 * This class converts strings to their UTF-8 code units (a list of | 76 * This class converts strings to their UTF-8 code units (a list of |
77 * unsigned 8-bit integers). | 77 * unsigned 8-bit integers). |
78 */ | 78 */ |
79 class Utf8Encoder extends Converter<String, List<int>> { | 79 class Utf8Encoder extends |
| 80 ChunkedConverter<String, List<int>, String, List<int>> { |
80 | 81 |
81 const Utf8Encoder(); | 82 const Utf8Encoder(); |
82 | 83 |
83 /** | 84 /** |
84 * Converts [string] to its UTF-8 code units (a list of | 85 * Converts [string] to its UTF-8 code units (a list of |
85 * unsigned 8-bit integers). | 86 * unsigned 8-bit integers). |
86 * | 87 * |
87 * If [start] and [end] are provided, only the substring | 88 * If [start] and [end] are provided, only the substring |
88 * `string.substring(start, end)` is converted. | 89 * `string.substring(start, end)` is converted. |
89 */ | 90 */ |
(...skipping 207 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
297 } | 298 } |
298 | 299 |
299 // TODO(floitsch): implement asUtf8Sink. Sligthly complicated because it | 300 // TODO(floitsch): implement asUtf8Sink. Sligthly complicated because it |
300 // needs to deal with malformed input. | 301 // needs to deal with malformed input. |
301 } | 302 } |
302 | 303 |
303 /** | 304 /** |
304 * This class converts UTF-8 code units (lists of unsigned 8-bit integers) | 305 * This class converts UTF-8 code units (lists of unsigned 8-bit integers) |
305 * to a string. | 306 * to a string. |
306 */ | 307 */ |
307 class Utf8Decoder extends Converter<List<int>, String> { | 308 class Utf8Decoder extends |
| 309 ChunkedConverter<List<int>, String, List<int>, String> { |
308 final bool _allowMalformed; | 310 final bool _allowMalformed; |
309 | 311 |
310 /** | 312 /** |
311 * Instantiates a new [Utf8Decoder]. | 313 * Instantiates a new [Utf8Decoder]. |
312 * | 314 * |
313 * The optional [allowMalformed] argument defines how [convert] deals | 315 * The optional [allowMalformed] argument defines how [convert] deals |
314 * with invalid or unterminated character sequences. | 316 * with invalid or unterminated character sequences. |
315 * | 317 * |
316 * If it is `true` [convert] replaces invalid (or unterminated) character | 318 * If it is `true` [convert] replaces invalid (or unterminated) character |
317 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise | 319 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise |
318 * it throws a [FormatException]. | 320 * it throws a [FormatException]. |
319 */ | 321 */ |
320 const Utf8Decoder({ bool allowMalformed: false }) | 322 const Utf8Decoder({ bool allowMalformed: false }) |
321 : this._allowMalformed = allowMalformed; | 323 : this._allowMalformed = allowMalformed; |
322 | 324 |
323 /** | 325 /** |
324 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the | 326 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the |
325 * corresponding string. | 327 * corresponding string. |
326 * | 328 * |
327 * Uses the code units from [start] to, but no including, [end]. | 329 * Uses the code units from [start] to, but no including, [end]. |
328 * If [end] is omitted, it defaults to `codeUnits.length`. | 330 * If [end] is omitted, it defaults to `codeUnits.length`. |
329 * | 331 * |
330 * If the [codeUnits] start with a leading [UNICODE_BOM_CHARACTER_RUNE] this | 332 * If the [codeUnits] start with a leading [UNICODE_BOM_CHARACTER_RUNE] this |
331 * character is discarded. | 333 * character is discarded. |
332 */ | 334 */ |
333 String convert(List<int> codeUnits, [int start = 0, int end]) { | 335 String convert(List<int> codeUnits, [int start = 0, int end]) { |
| 336 // Allow the implementation to intercept and specialize based on the type |
| 337 // of codeUnits. |
| 338 String result = _convertIntercepted(_allowMalformed, codeUnits, start, end); |
| 339 if (result != null) { |
| 340 return result; |
| 341 } |
| 342 |
334 int length = codeUnits.length; | 343 int length = codeUnits.length; |
335 RangeError.checkValidRange(start, end, length); | 344 RangeError.checkValidRange(start, end, length); |
336 if (end == null) end = length; | 345 if (end == null) end = length; |
337 StringBuffer buffer = new StringBuffer(); | 346 StringBuffer buffer = new StringBuffer(); |
338 _Utf8Decoder decoder = new _Utf8Decoder(buffer, _allowMalformed); | 347 _Utf8Decoder decoder = new _Utf8Decoder(buffer, _allowMalformed); |
339 decoder.convert(codeUnits, start, end); | 348 decoder.convert(codeUnits, start, end); |
340 decoder.close(); | 349 decoder.close(); |
341 return buffer.toString(); | 350 return buffer.toString(); |
342 } | 351 } |
343 | 352 |
(...skipping 10 matching lines...) Expand all Loading... |
354 } else { | 363 } else { |
355 stringSink = new StringConversionSink.from(sink); | 364 stringSink = new StringConversionSink.from(sink); |
356 } | 365 } |
357 return stringSink.asUtf8Sink(_allowMalformed); | 366 return stringSink.asUtf8Sink(_allowMalformed); |
358 } | 367 } |
359 | 368 |
360 // Override the base-classes bind, to provide a better type. | 369 // Override the base-classes bind, to provide a better type. |
361 Stream<String> bind(Stream<List<int>> stream) => super.bind(stream); | 370 Stream<String> bind(Stream<List<int>> stream) => super.bind(stream); |
362 | 371 |
363 external Converter<List<int>,dynamic> fuse(Converter<String, dynamic> next); | 372 external Converter<List<int>,dynamic> fuse(Converter<String, dynamic> next); |
| 373 |
| 374 external static String _convertIntercepted( |
| 375 bool allowMalformed, List<int> codeUnits, int start, int end); |
364 } | 376 } |
365 | 377 |
366 // UTF-8 constants. | 378 // UTF-8 constants. |
367 const int _ONE_BYTE_LIMIT = 0x7f; // 7 bits | 379 const int _ONE_BYTE_LIMIT = 0x7f; // 7 bits |
368 const int _TWO_BYTE_LIMIT = 0x7ff; // 11 bits | 380 const int _TWO_BYTE_LIMIT = 0x7ff; // 11 bits |
369 const int _THREE_BYTE_LIMIT = 0xffff; // 16 bits | 381 const int _THREE_BYTE_LIMIT = 0xffff; // 16 bits |
370 const int _FOUR_BYTE_LIMIT = 0x10ffff; // 21 bits, truncated to Unicode max. | 382 const int _FOUR_BYTE_LIMIT = 0x10ffff; // 21 bits, truncated to Unicode max. |
371 | 383 |
372 // UTF-16 constants. | 384 // UTF-16 constants. |
373 const int _SURROGATE_MASK = 0xF800; | 385 const int _SURROGATE_MASK = 0xF800; |
374 const int _SURROGATE_TAG_MASK = 0xFC00; | 386 const int _SURROGATE_TAG_MASK = 0xFC00; |
375 const int _SURROGATE_VALUE_MASK = 0x3FF; | 387 const int _SURROGATE_VALUE_MASK = 0x3FF; |
376 const int _LEAD_SURROGATE_MIN = 0xD800; | 388 const int _LEAD_SURROGATE_MIN = 0xD800; |
377 const int _TAIL_SURROGATE_MIN = 0xDC00; | 389 const int _TAIL_SURROGATE_MIN = 0xDC00; |
378 | 390 |
379 bool _isSurrogate(int codeUnit) => | |
380 (codeUnit & _SURROGATE_MASK) == _LEAD_SURROGATE_MIN; | |
381 bool _isLeadSurrogate(int codeUnit) => | 391 bool _isLeadSurrogate(int codeUnit) => |
382 (codeUnit & _SURROGATE_TAG_MASK) == _LEAD_SURROGATE_MIN; | 392 (codeUnit & _SURROGATE_TAG_MASK) == _LEAD_SURROGATE_MIN; |
383 bool _isTailSurrogate(int codeUnit) => | 393 bool _isTailSurrogate(int codeUnit) => |
384 (codeUnit & _SURROGATE_TAG_MASK) == _TAIL_SURROGATE_MIN; | 394 (codeUnit & _SURROGATE_TAG_MASK) == _TAIL_SURROGATE_MIN; |
385 int _combineSurrogatePair(int lead, int tail) => | 395 int _combineSurrogatePair(int lead, int tail) => |
386 0x10000 + ((lead & _SURROGATE_VALUE_MASK) << 10) | 396 0x10000 + ((lead & _SURROGATE_VALUE_MASK) << 10) |
387 | (tail & _SURROGATE_VALUE_MASK); | 397 | (tail & _SURROGATE_VALUE_MASK); |
388 | 398 |
389 | |
390 /** | 399 /** |
391 * Decodes UTF-8. | 400 * Decodes UTF-8. |
392 * | 401 * |
393 * The decoder handles chunked input. | 402 * The decoder handles chunked input. |
394 */ | 403 */ |
395 // TODO(floitsch): make this class public. | 404 // TODO(floitsch): make this class public. |
396 class _Utf8Decoder { | 405 class _Utf8Decoder { |
397 final bool _allowMalformed; | 406 final bool _allowMalformed; |
398 final StringSink _stringSink; | 407 final StringSink _stringSink; |
399 bool _isFirstCharacter = true; | 408 bool _isFirstCharacter = true; |
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
555 } | 564 } |
556 break loop; | 565 break loop; |
557 } | 566 } |
558 if (expectedUnits > 0) { | 567 if (expectedUnits > 0) { |
559 _value = value; | 568 _value = value; |
560 _expectedUnits = expectedUnits; | 569 _expectedUnits = expectedUnits; |
561 _extraUnits = extraUnits; | 570 _extraUnits = extraUnits; |
562 } | 571 } |
563 } | 572 } |
564 } | 573 } |
OLD | NEW |