| Index: packages/utf/lib/src/utf8.dart
|
| diff --git a/packages/utf/lib/src/utf/utf8.dart b/packages/utf/lib/src/utf8.dart
|
| similarity index 79%
|
| rename from packages/utf/lib/src/utf/utf8.dart
|
| rename to packages/utf/lib/src/utf8.dart
|
| index ff1b1ed81e6079b529d85649071ffa7c0a838541..ecf870768e8f08b06c11f5f684af185431742906 100644
|
| --- a/packages/utf/lib/src/utf/utf8.dart
|
| +++ b/packages/utf/lib/src/utf8.dart
|
| @@ -2,7 +2,13 @@
|
| // for details. All rights reserved. Use of this source code is governed by a
|
| // BSD-style license that can be found in the LICENSE file.
|
|
|
| -part of utf;
|
| +library utf.utf8;
|
| +
|
| +import "dart:collection";
|
| +
|
| +import 'constants.dart';
|
| +import 'list_range.dart';
|
| +import 'shared.dart';
|
|
|
| const int _UTF8_ONE_BYTE_MAX = 0x7f;
|
| const int _UTF8_TWO_BYTE_MAX = 0x7ff;
|
| @@ -28,7 +34,8 @@ const int _UTF8_SUBSEQUENT_BYTE_BASE = 0x80;
|
| * as much of the input as needed. Set the replacementCharacter to null to
|
| * throw an ArgumentError rather than replace the bad value.
|
| */
|
| -IterableUtf8Decoder decodeUtf8AsIterable(List<int> bytes, [int offset = 0,
|
| +IterableUtf8Decoder decodeUtf8AsIterable(List<int> bytes,
|
| + [int offset = 0,
|
| int length,
|
| int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
|
| return new IterableUtf8Decoder(bytes, offset, length, replacementCodepoint);
|
| @@ -41,23 +48,24 @@ IterableUtf8Decoder decodeUtf8AsIterable(List<int> bytes, [int offset = 0,
|
| * Set the replacementCharacter to null to throw an ArgumentError
|
| * rather than replace the bad value.
|
| */
|
| -String decodeUtf8(List<int> bytes, [int offset = 0, int length,
|
| +String decodeUtf8(List<int> bytes,
|
| + [int offset = 0,
|
| + int length,
|
| int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
|
| return new String.fromCharCodes(
|
| (new Utf8Decoder(bytes, offset, length, replacementCodepoint))
|
| - .decodeRest());
|
| + .decodeRest());
|
| }
|
|
|
| /**
|
| * Produce a sequence of UTF-8 encoded bytes from the provided string.
|
| */
|
| -List<int> encodeUtf8(String str) =>
|
| - codepointsToUtf8(stringToCodepoints(str));
|
| +List<int> encodeUtf8(String str) => codepointsToUtf8(stringToCodepoints(str));
|
|
|
| int _addToEncoding(int offset, int bytes, int value, List<int> buffer) {
|
| while (bytes > 0) {
|
| - buffer[offset + bytes] = _UTF8_SUBSEQUENT_BYTE_BASE |
|
| - (value & _UTF8_LO_SIX_BIT_MASK);
|
| + buffer[offset + bytes] =
|
| + _UTF8_SUBSEQUENT_BYTE_BASE | (value & _UTF8_LO_SIX_BIT_MASK);
|
| value = value >> 6;
|
| bytes--;
|
| }
|
| @@ -67,8 +75,7 @@ int _addToEncoding(int offset, int bytes, int value, List<int> buffer) {
|
| /**
|
| * Encode code points as UTF-8 code units.
|
| */
|
| -List<int> codepointsToUtf8(
|
| - List<int> codepoints, [int offset = 0, int length]) {
|
| +List<int> codepointsToUtf8(List<int> codepoints, [int offset = 0, int length]) {
|
| ListRange source = new ListRange(codepoints, offset, length);
|
|
|
| int encodedLength = 0;
|
| @@ -96,19 +103,19 @@ List<int> codepointsToUtf8(
|
| encoded[insertAt] = value;
|
| insertAt++;
|
| } else if (value <= _UTF8_TWO_BYTE_MAX) {
|
| - encoded[insertAt] = _UTF8_FIRST_BYTE_OF_TWO_BASE | (
|
| - _UTF8_FIRST_BYTE_OF_TWO_MASK &
|
| - _addToEncoding(insertAt, 1, value, encoded));
|
| + encoded[insertAt] = _UTF8_FIRST_BYTE_OF_TWO_BASE |
|
| + (_UTF8_FIRST_BYTE_OF_TWO_MASK &
|
| + _addToEncoding(insertAt, 1, value, encoded));
|
| insertAt += 2;
|
| } else if (value <= _UTF8_THREE_BYTE_MAX) {
|
| - encoded[insertAt] = _UTF8_FIRST_BYTE_OF_THREE_BASE | (
|
| - _UTF8_FIRST_BYTE_OF_THREE_MASK &
|
| - _addToEncoding(insertAt, 2, value, encoded));
|
| + encoded[insertAt] = _UTF8_FIRST_BYTE_OF_THREE_BASE |
|
| + (_UTF8_FIRST_BYTE_OF_THREE_MASK &
|
| + _addToEncoding(insertAt, 2, value, encoded));
|
| insertAt += 3;
|
| } else if (value <= UNICODE_VALID_RANGE_MAX) {
|
| - encoded[insertAt] = _UTF8_FIRST_BYTE_OF_FOUR_BASE | (
|
| - _UTF8_FIRST_BYTE_OF_FOUR_MASK &
|
| - _addToEncoding(insertAt, 3, value, encoded));
|
| + encoded[insertAt] = _UTF8_FIRST_BYTE_OF_FOUR_BASE |
|
| + (_UTF8_FIRST_BYTE_OF_FOUR_MASK &
|
| + _addToEncoding(insertAt, 3, value, encoded));
|
| insertAt += 4;
|
| }
|
| }
|
| @@ -117,11 +124,12 @@ List<int> codepointsToUtf8(
|
|
|
| // Because UTF-8 specifies byte order, we do not have to follow the pattern
|
| // used by UTF-16 & UTF-32 regarding byte order.
|
| -List<int> utf8ToCodepoints(
|
| - List<int> utf8EncodedBytes, [int offset = 0, int length,
|
| +List<int> utf8ToCodepoints(List<int> utf8EncodedBytes,
|
| + [int offset = 0,
|
| + int length,
|
| int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
|
| - return new Utf8Decoder(utf8EncodedBytes, offset, length,
|
| - replacementCodepoint).decodeRest();
|
| + return new Utf8Decoder(utf8EncodedBytes, offset, length, replacementCodepoint)
|
| + .decodeRest();
|
| }
|
|
|
| /**
|
| @@ -137,7 +145,9 @@ class IterableUtf8Decoder extends IterableBase<int> {
|
| final int length;
|
| final int replacementCodepoint;
|
|
|
| - IterableUtf8Decoder(this.bytes, [this.offset = 0, this.length = null,
|
| + IterableUtf8Decoder(this.bytes,
|
| + [this.offset = 0,
|
| + this.length = null,
|
| this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
|
|
|
| Utf8Decoder get iterator =>
|
| @@ -158,17 +168,16 @@ class Utf8Decoder implements Iterator<int> {
|
| final int replacementCodepoint;
|
| int _current = null;
|
|
|
| - Utf8Decoder(List<int> utf8EncodedBytes, [int offset = 0, int length,
|
| - this.replacementCodepoint =
|
| - UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
|
| - utf8EncodedBytesIterator =
|
| - (new ListRange(utf8EncodedBytes, offset, length)).iterator;
|
| -
|
| + Utf8Decoder(List<int> utf8EncodedBytes,
|
| + [int offset = 0,
|
| + int length,
|
| + this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])
|
| + : utf8EncodedBytesIterator =
|
| + (new ListRange(utf8EncodedBytes, offset, length)).iterator;
|
|
|
| - Utf8Decoder._fromListRangeIterator(ListRange source, [
|
| - this.replacementCodepoint =
|
| - UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
|
| - utf8EncodedBytesIterator = source.iterator;
|
| + Utf8Decoder._fromListRangeIterator(ListRange source,
|
| + [this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])
|
| + : utf8EncodedBytesIterator = source.iterator;
|
|
|
| /** Decode the remaininder of the characters in this decoder
|
| * into a [List<int>].
|
| @@ -254,11 +263,10 @@ class Utf8Decoder implements Iterator<int> {
|
| }
|
| j++;
|
| }
|
| - bool validSequence = (j == additionalBytes && (
|
| - value < UNICODE_UTF16_RESERVED_LO ||
|
| - value > UNICODE_UTF16_RESERVED_HI));
|
| - bool nonOverlong =
|
| - (additionalBytes == 1 && value > _UTF8_ONE_BYTE_MAX) ||
|
| + bool validSequence = (j == additionalBytes &&
|
| + (value < UNICODE_UTF16_RESERVED_LO ||
|
| + value > UNICODE_UTF16_RESERVED_HI));
|
| + bool nonOverlong = (additionalBytes == 1 && value > _UTF8_ONE_BYTE_MAX) ||
|
| (additionalBytes == 2 && value > _UTF8_TWO_BYTE_MAX) ||
|
| (additionalBytes == 3 && value > _UTF8_THREE_BYTE_MAX);
|
| bool inRange = value <= UNICODE_VALID_RANGE_MAX;
|
|
|