mojo/public/dart/third_party/utf/lib/src/utf/utf8.dart - Issue 1346773002: Stop running pub get at gclient sync time and fix build bugs

Unified Diff: mojo/public/dart/third_party/utf/lib/src/utf/utf8.dart

Issue 1346773002: Stop running pub get at gclient sync time and fix build bugs (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « mojo/public/dart/third_party/utf/lib/src/utf/utf32.dart ('k') | mojo/public/dart/third_party/utf/lib/src/utf/utf_stream.dart » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: mojo/public/dart/third_party/utf/lib/src/utf/utf8.dart

diff --git a/mojo/public/dart/third_party/utf/lib/src/utf/utf8.dart b/mojo/public/dart/third_party/utf/lib/src/utf/utf8.dart

new file mode 100644

index 0000000000000000000000000000000000000000..ff1b1ed81e6079b529d85649071ffa7c0a838541

--- /dev/null

+++ b/mojo/public/dart/third_party/utf/lib/src/utf/utf8.dart

@@ -0,0 +1,276 @@

+// BSD-style license that can be found in the LICENSE file.

+part of utf;

+const int _UTF8_ONE_BYTE_MAX = 0x7f;

+const int _UTF8_TWO_BYTE_MAX = 0x7ff;

+const int _UTF8_THREE_BYTE_MAX = 0xffff;

+const int _UTF8_LO_SIX_BIT_MASK = 0x3f;

+const int _UTF8_FIRST_BYTE_OF_TWO_BASE = 0xc0;

+const int _UTF8_FIRST_BYTE_OF_THREE_BASE = 0xe0;

+const int _UTF8_FIRST_BYTE_OF_FOUR_BASE = 0xf0;

+const int _UTF8_FIRST_BYTE_OF_FIVE_BASE = 0xf8;

+const int _UTF8_FIRST_BYTE_OF_SIX_BASE = 0xfc;

+const int _UTF8_FIRST_BYTE_OF_TWO_MASK = 0x1f;

+const int _UTF8_FIRST_BYTE_OF_THREE_MASK = 0xf;

+const int _UTF8_FIRST_BYTE_OF_FOUR_MASK = 0x7;

+const int _UTF8_FIRST_BYTE_BOUND_EXCL = 0xfe;

+const int _UTF8_SUBSEQUENT_BYTE_BASE = 0x80;

+/**

+ * Decodes the UTF-8 bytes as an iterable. Thus, the consumer can only convert

+ * as much of the input as needed. Set the replacementCharacter to null to

+ * throw an ArgumentError rather than replace the bad value.

+ */

+IterableUtf8Decoder decodeUtf8AsIterable(List<int> bytes, [int offset = 0,

+ int length,

+ int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

+ return new IterableUtf8Decoder(bytes, offset, length, replacementCodepoint);

+/**

+ * Produce a String from a List of UTF-8 encoded bytes. The parameters

+ * can set an offset into a list of bytes (as int), limit the length of the

+ * values to be decoded, and override the default Unicode replacement character.

+ * Set the replacementCharacter to null to throw an ArgumentError

+ * rather than replace the bad value.

+ */

+String decodeUtf8(List<int> bytes, [int offset = 0, int length,

+ int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

+ return new String.fromCharCodes(

+ (new Utf8Decoder(bytes, offset, length, replacementCodepoint))

+ .decodeRest());

+/**

+ * Produce a sequence of UTF-8 encoded bytes from the provided string.

+ */

+List<int> encodeUtf8(String str) =>

+ codepointsToUtf8(stringToCodepoints(str));

+int _addToEncoding(int offset, int bytes, int value, List<int> buffer) {

+ while (bytes > 0) {

+ buffer[offset + bytes] = _UTF8_SUBSEQUENT_BYTE_BASE |

+ (value & _UTF8_LO_SIX_BIT_MASK);

+ value = value >> 6;

+ bytes--;

+ }

+ return value;

+/**

+ * Encode code points as UTF-8 code units.

+ */

+List<int> codepointsToUtf8(

+ List<int> codepoints, [int offset = 0, int length]) {

+ ListRange source = new ListRange(codepoints, offset, length);

+ int encodedLength = 0;

+ for (int value in source) {

+ if (value < 0 || value > UNICODE_VALID_RANGE_MAX) {

+ encodedLength += 3;

+ } else if (value <= _UTF8_ONE_BYTE_MAX) {

+ encodedLength++;

+ } else if (value <= _UTF8_TWO_BYTE_MAX) {

+ encodedLength += 2;

+ } else if (value <= _UTF8_THREE_BYTE_MAX) {

+ encodedLength += 3;

+ } else if (value <= UNICODE_VALID_RANGE_MAX) {

+ encodedLength += 4;

+ }

+ List<int> encoded = new List<int>(encodedLength);

+ int insertAt = 0;

+ for (int value in source) {

+ if (value < 0 || value > UNICODE_VALID_RANGE_MAX) {

+ encoded.setRange(insertAt, insertAt + 3, [0xef, 0xbf, 0xbd]);

+ insertAt += 3;

+ } else if (value <= _UTF8_ONE_BYTE_MAX) {

+ encoded[insertAt] = value;

+ insertAt++;

+ } else if (value <= _UTF8_TWO_BYTE_MAX) {

+ encoded[insertAt] = _UTF8_FIRST_BYTE_OF_TWO_BASE | (

+ _UTF8_FIRST_BYTE_OF_TWO_MASK &

+ _addToEncoding(insertAt, 1, value, encoded));

+ insertAt += 2;

+ } else if (value <= _UTF8_THREE_BYTE_MAX) {

+ encoded[insertAt] = _UTF8_FIRST_BYTE_OF_THREE_BASE | (

+ _UTF8_FIRST_BYTE_OF_THREE_MASK &

+ _addToEncoding(insertAt, 2, value, encoded));

+ insertAt += 3;

+ } else if (value <= UNICODE_VALID_RANGE_MAX) {

+ encoded[insertAt] = _UTF8_FIRST_BYTE_OF_FOUR_BASE | (

+ _UTF8_FIRST_BYTE_OF_FOUR_MASK &

+ _addToEncoding(insertAt, 3, value, encoded));

+ insertAt += 4;

+ }

+ return encoded;

+// Because UTF-8 specifies byte order, we do not have to follow the pattern

+// used by UTF-16 & UTF-32 regarding byte order.

+List<int> utf8ToCodepoints(

+ List<int> utf8EncodedBytes, [int offset = 0, int length,

+ int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

+ return new Utf8Decoder(utf8EncodedBytes, offset, length,

+ replacementCodepoint).decodeRest();

+/**

+ * Return type of [decodeUtf8AsIterable] and variants. The Iterable type

+ * provides an iterator on demand and the iterator will only translate bytes

+ * as requested by the user of the iterator. (Note: results are not cached.)

+ */

+// TODO(floitsch): Consider removing the extend and switch to implements since

+// that's cheaper to allocate.

+class IterableUtf8Decoder extends IterableBase<int> {

+ final List<int> bytes;

+ final int offset;

+ final int length;

+ final int replacementCodepoint;

+ IterableUtf8Decoder(this.bytes, [this.offset = 0, this.length = null,

+ this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);

+ Utf8Decoder get iterator =>

+ new Utf8Decoder(bytes, offset, length, replacementCodepoint);

+/**

+ * Provides an iterator of Unicode codepoints from UTF-8 encoded bytes. The

+ * parameters can set an offset into a list of bytes (as int), limit the length

+ * of the values to be decoded, and override the default Unicode replacement

+ * character. Set the replacementCharacter to null to throw an

+ * ArgumentError rather than replace the bad value. The return value

+ * from this method can be used as an Iterable (e.g. in a for-loop).

+ */

+class Utf8Decoder implements Iterator<int> {

+ // TODO(kevmoo): should this field be private?

+ final ListRangeIterator utf8EncodedBytesIterator;

+ final int replacementCodepoint;

+ int _current = null;

+ Utf8Decoder(List<int> utf8EncodedBytes, [int offset = 0, int length,

+ this.replacementCodepoint =

+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :

+ utf8EncodedBytesIterator =

+ (new ListRange(utf8EncodedBytes, offset, length)).iterator;

+ Utf8Decoder._fromListRangeIterator(ListRange source, [

+ this.replacementCodepoint =

+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :

+ utf8EncodedBytesIterator = source.iterator;

+ /** Decode the remaininder of the characters in this decoder

+ * into a [List<int>].

+ */

+ List<int> decodeRest() {

+ List<int> codepoints = new List<int>(utf8EncodedBytesIterator.remaining);

+ int i = 0;

+ while (moveNext()) {

+ codepoints[i++] = current;

+ }

+ if (i == codepoints.length) {

+ return codepoints;

+ } else {

+ List<int> truncCodepoints = new List<int>(i);

+ truncCodepoints.setRange(0, i, codepoints);

+ return truncCodepoints;

+ }

+ int get current => _current;

+ bool moveNext() {

+ _current = null;

+ if (!utf8EncodedBytesIterator.moveNext()) return false;

+ int value = utf8EncodedBytesIterator.current;

+ int additionalBytes = 0;

+ if (value < 0) {

+ if (replacementCodepoint != null) {

+ _current = replacementCodepoint;

+ return true;

+ } else {

+ throw new ArgumentError(

+ "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");

+ }

+ } else if (value <= _UTF8_ONE_BYTE_MAX) {

+ _current = value;

+ return true;

+ } else if (value < _UTF8_FIRST_BYTE_OF_TWO_BASE) {

+ if (replacementCodepoint != null) {

+ _current = replacementCodepoint;

+ return true;

+ } else {

+ throw new ArgumentError(

+ "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");

+ }

+ } else if (value < _UTF8_FIRST_BYTE_OF_THREE_BASE) {

+ value -= _UTF8_FIRST_BYTE_OF_TWO_BASE;

+ additionalBytes = 1;

+ } else if (value < _UTF8_FIRST_BYTE_OF_FOUR_BASE) {

+ value -= _UTF8_FIRST_BYTE_OF_THREE_BASE;

+ additionalBytes = 2;

+ } else if (value < _UTF8_FIRST_BYTE_OF_FIVE_BASE) {

+ value -= _UTF8_FIRST_BYTE_OF_FOUR_BASE;

+ additionalBytes = 3;

+ } else if (value < _UTF8_FIRST_BYTE_OF_SIX_BASE) {

+ value -= _UTF8_FIRST_BYTE_OF_FIVE_BASE;

+ additionalBytes = 4;

+ } else if (value < _UTF8_FIRST_BYTE_BOUND_EXCL) {

+ value -= _UTF8_FIRST_BYTE_OF_SIX_BASE;

+ additionalBytes = 5;

+ } else if (replacementCodepoint != null) {

+ _current = replacementCodepoint;

+ return true;

+ } else {

+ throw new ArgumentError(

+ "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");

+ }

+ int j = 0;

+ while (j < additionalBytes && utf8EncodedBytesIterator.moveNext()) {

+ int nextValue = utf8EncodedBytesIterator.current;

+ if (nextValue > _UTF8_ONE_BYTE_MAX &&

+ nextValue < _UTF8_FIRST_BYTE_OF_TWO_BASE) {

+ value = ((value << 6) | (nextValue & _UTF8_LO_SIX_BIT_MASK));

+ } else {

+ // if sequence-starting code unit, reposition cursor to start here

+ if (nextValue >= _UTF8_FIRST_BYTE_OF_TWO_BASE) {

+ utf8EncodedBytesIterator.backup();

+ }

+ break;

+ }

+ j++;

+ }

+ bool validSequence = (j == additionalBytes && (

+ value < UNICODE_UTF16_RESERVED_LO ||

+ value > UNICODE_UTF16_RESERVED_HI));

+ bool nonOverlong =

+ (additionalBytes == 1 && value > _UTF8_ONE_BYTE_MAX) ||

+ (additionalBytes == 2 && value > _UTF8_TWO_BYTE_MAX) ||

+ (additionalBytes == 3 && value > _UTF8_THREE_BYTE_MAX);

+ bool inRange = value <= UNICODE_VALID_RANGE_MAX;

+ if (validSequence && nonOverlong && inRange) {

+ _current = value;

+ return true;

+ } else if (replacementCodepoint != null) {

+ _current = replacementCodepoint;

+ return true;

+ } else {

+ throw new ArgumentError(

+ "Invalid UTF8 at ${utf8EncodedBytesIterator.position - j}");

+ }