Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(65)

Side by Side Diff: tool/input_sdk/lib/convert/utf.dart

Issue 1965563003: Update dart:convert and dart:core Uri. (Closed) Base URL: https://github.com/dart-lang/dev_compiler.git@master
Patch Set: Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 part of dart.convert; 5 part of dart.convert;
6 6
7 /** The Unicode Replacement character `U+FFFD` (�). */ 7 /** The Unicode Replacement character `U+FFFD` (�). */
8 const int UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD; 8 const int UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD;
9 9
10 /** The Unicode Byte Order Marker (BOM) character `U+FEFF`. */ 10 /** The Unicode Byte Order Marker (BOM) character `U+FEFF`. */
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
59 * `U+FFFD` (�). Otherwise it throws a [FormatException]. 59 * `U+FFFD` (�). Otherwise it throws a [FormatException].
60 * 60 *
61 * If [allowMalformed] is not given, it defaults to the `allowMalformed` that 61 * If [allowMalformed] is not given, it defaults to the `allowMalformed` that
62 * was used to instantiate `this`. 62 * was used to instantiate `this`.
63 */ 63 */
64 String decode(List<int> codeUnits, { bool allowMalformed }) { 64 String decode(List<int> codeUnits, { bool allowMalformed }) {
65 if (allowMalformed == null) allowMalformed = _allowMalformed; 65 if (allowMalformed == null) allowMalformed = _allowMalformed;
66 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); 66 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits);
67 } 67 }
68 68
69 Utf8Encoder get encoder => new Utf8Encoder(); 69 Utf8Encoder get encoder => const Utf8Encoder();
70 Utf8Decoder get decoder { 70 Utf8Decoder get decoder {
71 return new Utf8Decoder(allowMalformed: _allowMalformed); 71 return new Utf8Decoder(allowMalformed: _allowMalformed);
72 } 72 }
73 } 73 }
74 74
75 /** 75 /**
76 * This class converts strings to their UTF-8 code units (a list of 76 * This class converts strings to their UTF-8 code units (a list of
77 * unsigned 8-bit integers). 77 * unsigned 8-bit integers).
78 */ 78 */
79 class Utf8Encoder extends Converter<String, List<int>> { 79 class Utf8Encoder extends
80 ChunkedConverter<String, List<int>, String, List<int>> {
80 81
81 const Utf8Encoder(); 82 const Utf8Encoder();
82 83
83 /** 84 /**
84 * Converts [string] to its UTF-8 code units (a list of 85 * Converts [string] to its UTF-8 code units (a list of
85 * unsigned 8-bit integers). 86 * unsigned 8-bit integers).
86 * 87 *
87 * If [start] and [end] are provided, only the substring 88 * If [start] and [end] are provided, only the substring
88 * `string.substring(start, end)` is converted. 89 * `string.substring(start, end)` is converted.
89 */ 90 */
(...skipping 207 matching lines...) Expand 10 before | Expand all | Expand 10 after
297 } 298 }
298 299
299 // TODO(floitsch): implement asUtf8Sink. Sligthly complicated because it 300 // TODO(floitsch): implement asUtf8Sink. Sligthly complicated because it
300 // needs to deal with malformed input. 301 // needs to deal with malformed input.
301 } 302 }
302 303
303 /** 304 /**
304 * This class converts UTF-8 code units (lists of unsigned 8-bit integers) 305 * This class converts UTF-8 code units (lists of unsigned 8-bit integers)
305 * to a string. 306 * to a string.
306 */ 307 */
307 class Utf8Decoder extends Converter<List<int>, String> { 308 class Utf8Decoder extends
309 ChunkedConverter<List<int>, String, List<int>, String> {
308 final bool _allowMalformed; 310 final bool _allowMalformed;
309 311
310 /** 312 /**
311 * Instantiates a new [Utf8Decoder]. 313 * Instantiates a new [Utf8Decoder].
312 * 314 *
313 * The optional [allowMalformed] argument defines how [convert] deals 315 * The optional [allowMalformed] argument defines how [convert] deals
314 * with invalid or unterminated character sequences. 316 * with invalid or unterminated character sequences.
315 * 317 *
316 * If it is `true` [convert] replaces invalid (or unterminated) character 318 * If it is `true` [convert] replaces invalid (or unterminated) character
317 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise 319 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise
318 * it throws a [FormatException]. 320 * it throws a [FormatException].
319 */ 321 */
320 const Utf8Decoder({ bool allowMalformed: false }) 322 const Utf8Decoder({ bool allowMalformed: false })
321 : this._allowMalformed = allowMalformed; 323 : this._allowMalformed = allowMalformed;
322 324
323 /** 325 /**
324 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the 326 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the
325 * corresponding string. 327 * corresponding string.
326 * 328 *
327 * Uses the code units from [start] to, but no including, [end]. 329 * Uses the code units from [start] to, but no including, [end].
328 * If [end] is omitted, it defaults to `codeUnits.length`. 330 * If [end] is omitted, it defaults to `codeUnits.length`.
329 * 331 *
330 * If the [codeUnits] start with a leading [UNICODE_BOM_CHARACTER_RUNE] this 332 * If the [codeUnits] start with a leading [UNICODE_BOM_CHARACTER_RUNE] this
331 * character is discarded. 333 * character is discarded.
332 */ 334 */
333 String convert(List<int> codeUnits, [int start = 0, int end]) { 335 String convert(List<int> codeUnits, [int start = 0, int end]) {
336 // Allow the implementation to intercept and specialize based on the type
337 // of codeUnits.
338 String result = _convertIntercepted(_allowMalformed, codeUnits, start, end);
339 if (result != null) {
340 return result;
341 }
342
334 int length = codeUnits.length; 343 int length = codeUnits.length;
335 RangeError.checkValidRange(start, end, length); 344 RangeError.checkValidRange(start, end, length);
336 if (end == null) end = length; 345 if (end == null) end = length;
337 StringBuffer buffer = new StringBuffer(); 346 StringBuffer buffer = new StringBuffer();
338 _Utf8Decoder decoder = new _Utf8Decoder(buffer, _allowMalformed); 347 _Utf8Decoder decoder = new _Utf8Decoder(buffer, _allowMalformed);
339 decoder.convert(codeUnits, start, end); 348 decoder.convert(codeUnits, start, end);
340 decoder.close(); 349 decoder.close();
341 return buffer.toString(); 350 return buffer.toString();
342 } 351 }
343 352
(...skipping 10 matching lines...) Expand all
354 } else { 363 } else {
355 stringSink = new StringConversionSink.from(sink); 364 stringSink = new StringConversionSink.from(sink);
356 } 365 }
357 return stringSink.asUtf8Sink(_allowMalformed); 366 return stringSink.asUtf8Sink(_allowMalformed);
358 } 367 }
359 368
360 // Override the base-classes bind, to provide a better type. 369 // Override the base-classes bind, to provide a better type.
361 Stream<String> bind(Stream<List<int>> stream) => super.bind(stream); 370 Stream<String> bind(Stream<List<int>> stream) => super.bind(stream);
362 371
363 external Converter<List<int>,dynamic> fuse(Converter<String, dynamic> next); 372 external Converter<List<int>,dynamic> fuse(Converter<String, dynamic> next);
373
374 external static String _convertIntercepted(
375 bool allowMalformed, List<int> codeUnits, int start, int end);
364 } 376 }
365 377
366 // UTF-8 constants. 378 // UTF-8 constants.
367 const int _ONE_BYTE_LIMIT = 0x7f; // 7 bits 379 const int _ONE_BYTE_LIMIT = 0x7f; // 7 bits
368 const int _TWO_BYTE_LIMIT = 0x7ff; // 11 bits 380 const int _TWO_BYTE_LIMIT = 0x7ff; // 11 bits
369 const int _THREE_BYTE_LIMIT = 0xffff; // 16 bits 381 const int _THREE_BYTE_LIMIT = 0xffff; // 16 bits
370 const int _FOUR_BYTE_LIMIT = 0x10ffff; // 21 bits, truncated to Unicode max. 382 const int _FOUR_BYTE_LIMIT = 0x10ffff; // 21 bits, truncated to Unicode max.
371 383
372 // UTF-16 constants. 384 // UTF-16 constants.
373 const int _SURROGATE_MASK = 0xF800; 385 const int _SURROGATE_MASK = 0xF800;
374 const int _SURROGATE_TAG_MASK = 0xFC00; 386 const int _SURROGATE_TAG_MASK = 0xFC00;
375 const int _SURROGATE_VALUE_MASK = 0x3FF; 387 const int _SURROGATE_VALUE_MASK = 0x3FF;
376 const int _LEAD_SURROGATE_MIN = 0xD800; 388 const int _LEAD_SURROGATE_MIN = 0xD800;
377 const int _TAIL_SURROGATE_MIN = 0xDC00; 389 const int _TAIL_SURROGATE_MIN = 0xDC00;
378 390
379 bool _isSurrogate(int codeUnit) =>
380 (codeUnit & _SURROGATE_MASK) == _LEAD_SURROGATE_MIN;
381 bool _isLeadSurrogate(int codeUnit) => 391 bool _isLeadSurrogate(int codeUnit) =>
382 (codeUnit & _SURROGATE_TAG_MASK) == _LEAD_SURROGATE_MIN; 392 (codeUnit & _SURROGATE_TAG_MASK) == _LEAD_SURROGATE_MIN;
383 bool _isTailSurrogate(int codeUnit) => 393 bool _isTailSurrogate(int codeUnit) =>
384 (codeUnit & _SURROGATE_TAG_MASK) == _TAIL_SURROGATE_MIN; 394 (codeUnit & _SURROGATE_TAG_MASK) == _TAIL_SURROGATE_MIN;
385 int _combineSurrogatePair(int lead, int tail) => 395 int _combineSurrogatePair(int lead, int tail) =>
386 0x10000 + ((lead & _SURROGATE_VALUE_MASK) << 10) 396 0x10000 + ((lead & _SURROGATE_VALUE_MASK) << 10)
387 | (tail & _SURROGATE_VALUE_MASK); 397 | (tail & _SURROGATE_VALUE_MASK);
388 398
389
390 /** 399 /**
391 * Decodes UTF-8. 400 * Decodes UTF-8.
392 * 401 *
393 * The decoder handles chunked input. 402 * The decoder handles chunked input.
394 */ 403 */
395 // TODO(floitsch): make this class public. 404 // TODO(floitsch): make this class public.
396 class _Utf8Decoder { 405 class _Utf8Decoder {
397 final bool _allowMalformed; 406 final bool _allowMalformed;
398 final StringSink _stringSink; 407 final StringSink _stringSink;
399 bool _isFirstCharacter = true; 408 bool _isFirstCharacter = true;
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after
555 } 564 }
556 break loop; 565 break loop;
557 } 566 }
558 if (expectedUnits > 0) { 567 if (expectedUnits > 0) {
559 _value = value; 568 _value = value;
560 _expectedUnits = expectedUnits; 569 _expectedUnits = expectedUnits;
561 _extraUnits = extraUnits; 570 _extraUnits = extraUnits;
562 } 571 }
563 } 572 }
564 } 573 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698