Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Unified Diff: mojo/public/dart/third_party/utf/lib/src/utf/utf8.dart

Issue 1346773002: Stop running pub get at gclient sync time and fix build bugs (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: mojo/public/dart/third_party/utf/lib/src/utf/utf8.dart
diff --git a/mojo/public/dart/third_party/utf/lib/src/utf/utf8.dart b/mojo/public/dart/third_party/utf/lib/src/utf/utf8.dart
new file mode 100644
index 0000000000000000000000000000000000000000..ff1b1ed81e6079b529d85649071ffa7c0a838541
--- /dev/null
+++ b/mojo/public/dart/third_party/utf/lib/src/utf/utf8.dart
@@ -0,0 +1,276 @@
+// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+part of utf;
+
+const int _UTF8_ONE_BYTE_MAX = 0x7f;
+const int _UTF8_TWO_BYTE_MAX = 0x7ff;
+const int _UTF8_THREE_BYTE_MAX = 0xffff;
+
+const int _UTF8_LO_SIX_BIT_MASK = 0x3f;
+
+const int _UTF8_FIRST_BYTE_OF_TWO_BASE = 0xc0;
+const int _UTF8_FIRST_BYTE_OF_THREE_BASE = 0xe0;
+const int _UTF8_FIRST_BYTE_OF_FOUR_BASE = 0xf0;
+const int _UTF8_FIRST_BYTE_OF_FIVE_BASE = 0xf8;
+const int _UTF8_FIRST_BYTE_OF_SIX_BASE = 0xfc;
+
+const int _UTF8_FIRST_BYTE_OF_TWO_MASK = 0x1f;
+const int _UTF8_FIRST_BYTE_OF_THREE_MASK = 0xf;
+const int _UTF8_FIRST_BYTE_OF_FOUR_MASK = 0x7;
+
+const int _UTF8_FIRST_BYTE_BOUND_EXCL = 0xfe;
+const int _UTF8_SUBSEQUENT_BYTE_BASE = 0x80;
+
+/**
+ * Decodes the UTF-8 bytes as an iterable. Thus, the consumer can only convert
+ * as much of the input as needed. Set the replacementCharacter to null to
+ * throw an ArgumentError rather than replace the bad value.
+ */
+IterableUtf8Decoder decodeUtf8AsIterable(List<int> bytes, [int offset = 0,
+ int length,
+ int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
+ return new IterableUtf8Decoder(bytes, offset, length, replacementCodepoint);
+}
+
+/**
+ * Produce a String from a List of UTF-8 encoded bytes. The parameters
+ * can set an offset into a list of bytes (as int), limit the length of the
+ * values to be decoded, and override the default Unicode replacement character.
+ * Set the replacementCharacter to null to throw an ArgumentError
+ * rather than replace the bad value.
+ */
+String decodeUtf8(List<int> bytes, [int offset = 0, int length,
+ int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
+ return new String.fromCharCodes(
+ (new Utf8Decoder(bytes, offset, length, replacementCodepoint))
+ .decodeRest());
+}
+
+/**
+ * Produce a sequence of UTF-8 encoded bytes from the provided string.
+ */
+List<int> encodeUtf8(String str) =>
+ codepointsToUtf8(stringToCodepoints(str));
+
+int _addToEncoding(int offset, int bytes, int value, List<int> buffer) {
+ while (bytes > 0) {
+ buffer[offset + bytes] = _UTF8_SUBSEQUENT_BYTE_BASE |
+ (value & _UTF8_LO_SIX_BIT_MASK);
+ value = value >> 6;
+ bytes--;
+ }
+ return value;
+}
+
+/**
+ * Encode code points as UTF-8 code units.
+ */
+List<int> codepointsToUtf8(
+ List<int> codepoints, [int offset = 0, int length]) {
+ ListRange source = new ListRange(codepoints, offset, length);
+
+ int encodedLength = 0;
+ for (int value in source) {
+ if (value < 0 || value > UNICODE_VALID_RANGE_MAX) {
+ encodedLength += 3;
+ } else if (value <= _UTF8_ONE_BYTE_MAX) {
+ encodedLength++;
+ } else if (value <= _UTF8_TWO_BYTE_MAX) {
+ encodedLength += 2;
+ } else if (value <= _UTF8_THREE_BYTE_MAX) {
+ encodedLength += 3;
+ } else if (value <= UNICODE_VALID_RANGE_MAX) {
+ encodedLength += 4;
+ }
+ }
+
+ List<int> encoded = new List<int>(encodedLength);
+ int insertAt = 0;
+ for (int value in source) {
+ if (value < 0 || value > UNICODE_VALID_RANGE_MAX) {
+ encoded.setRange(insertAt, insertAt + 3, [0xef, 0xbf, 0xbd]);
+ insertAt += 3;
+ } else if (value <= _UTF8_ONE_BYTE_MAX) {
+ encoded[insertAt] = value;
+ insertAt++;
+ } else if (value <= _UTF8_TWO_BYTE_MAX) {
+ encoded[insertAt] = _UTF8_FIRST_BYTE_OF_TWO_BASE | (
+ _UTF8_FIRST_BYTE_OF_TWO_MASK &
+ _addToEncoding(insertAt, 1, value, encoded));
+ insertAt += 2;
+ } else if (value <= _UTF8_THREE_BYTE_MAX) {
+ encoded[insertAt] = _UTF8_FIRST_BYTE_OF_THREE_BASE | (
+ _UTF8_FIRST_BYTE_OF_THREE_MASK &
+ _addToEncoding(insertAt, 2, value, encoded));
+ insertAt += 3;
+ } else if (value <= UNICODE_VALID_RANGE_MAX) {
+ encoded[insertAt] = _UTF8_FIRST_BYTE_OF_FOUR_BASE | (
+ _UTF8_FIRST_BYTE_OF_FOUR_MASK &
+ _addToEncoding(insertAt, 3, value, encoded));
+ insertAt += 4;
+ }
+ }
+ return encoded;
+}
+
+// Because UTF-8 specifies byte order, we do not have to follow the pattern
+// used by UTF-16 & UTF-32 regarding byte order.
+List<int> utf8ToCodepoints(
+ List<int> utf8EncodedBytes, [int offset = 0, int length,
+ int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
+ return new Utf8Decoder(utf8EncodedBytes, offset, length,
+ replacementCodepoint).decodeRest();
+}
+
+/**
+ * Return type of [decodeUtf8AsIterable] and variants. The Iterable type
+ * provides an iterator on demand and the iterator will only translate bytes
+ * as requested by the user of the iterator. (Note: results are not cached.)
+ */
+// TODO(floitsch): Consider removing the extend and switch to implements since
+// that's cheaper to allocate.
+class IterableUtf8Decoder extends IterableBase<int> {
+ final List<int> bytes;
+ final int offset;
+ final int length;
+ final int replacementCodepoint;
+
+ IterableUtf8Decoder(this.bytes, [this.offset = 0, this.length = null,
+ this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
+
+ Utf8Decoder get iterator =>
+ new Utf8Decoder(bytes, offset, length, replacementCodepoint);
+}
+
+/**
+ * Provides an iterator of Unicode codepoints from UTF-8 encoded bytes. The
+ * parameters can set an offset into a list of bytes (as int), limit the length
+ * of the values to be decoded, and override the default Unicode replacement
+ * character. Set the replacementCharacter to null to throw an
+ * ArgumentError rather than replace the bad value. The return value
+ * from this method can be used as an Iterable (e.g. in a for-loop).
+ */
+class Utf8Decoder implements Iterator<int> {
+ // TODO(kevmoo): should this field be private?
+ final ListRangeIterator utf8EncodedBytesIterator;
+ final int replacementCodepoint;
+ int _current = null;
+
+ Utf8Decoder(List<int> utf8EncodedBytes, [int offset = 0, int length,
+ this.replacementCodepoint =
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
+ utf8EncodedBytesIterator =
+ (new ListRange(utf8EncodedBytes, offset, length)).iterator;
+
+
+ Utf8Decoder._fromListRangeIterator(ListRange source, [
+ this.replacementCodepoint =
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
+ utf8EncodedBytesIterator = source.iterator;
+
+ /** Decode the remaininder of the characters in this decoder
+ * into a [List<int>].
+ */
+ List<int> decodeRest() {
+ List<int> codepoints = new List<int>(utf8EncodedBytesIterator.remaining);
+ int i = 0;
+ while (moveNext()) {
+ codepoints[i++] = current;
+ }
+ if (i == codepoints.length) {
+ return codepoints;
+ } else {
+ List<int> truncCodepoints = new List<int>(i);
+ truncCodepoints.setRange(0, i, codepoints);
+ return truncCodepoints;
+ }
+ }
+
+ int get current => _current;
+
+ bool moveNext() {
+ _current = null;
+
+ if (!utf8EncodedBytesIterator.moveNext()) return false;
+
+ int value = utf8EncodedBytesIterator.current;
+ int additionalBytes = 0;
+
+ if (value < 0) {
+ if (replacementCodepoint != null) {
+ _current = replacementCodepoint;
+ return true;
+ } else {
+ throw new ArgumentError(
+ "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");
+ }
+ } else if (value <= _UTF8_ONE_BYTE_MAX) {
+ _current = value;
+ return true;
+ } else if (value < _UTF8_FIRST_BYTE_OF_TWO_BASE) {
+ if (replacementCodepoint != null) {
+ _current = replacementCodepoint;
+ return true;
+ } else {
+ throw new ArgumentError(
+ "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");
+ }
+ } else if (value < _UTF8_FIRST_BYTE_OF_THREE_BASE) {
+ value -= _UTF8_FIRST_BYTE_OF_TWO_BASE;
+ additionalBytes = 1;
+ } else if (value < _UTF8_FIRST_BYTE_OF_FOUR_BASE) {
+ value -= _UTF8_FIRST_BYTE_OF_THREE_BASE;
+ additionalBytes = 2;
+ } else if (value < _UTF8_FIRST_BYTE_OF_FIVE_BASE) {
+ value -= _UTF8_FIRST_BYTE_OF_FOUR_BASE;
+ additionalBytes = 3;
+ } else if (value < _UTF8_FIRST_BYTE_OF_SIX_BASE) {
+ value -= _UTF8_FIRST_BYTE_OF_FIVE_BASE;
+ additionalBytes = 4;
+ } else if (value < _UTF8_FIRST_BYTE_BOUND_EXCL) {
+ value -= _UTF8_FIRST_BYTE_OF_SIX_BASE;
+ additionalBytes = 5;
+ } else if (replacementCodepoint != null) {
+ _current = replacementCodepoint;
+ return true;
+ } else {
+ throw new ArgumentError(
+ "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");
+ }
+ int j = 0;
+ while (j < additionalBytes && utf8EncodedBytesIterator.moveNext()) {
+ int nextValue = utf8EncodedBytesIterator.current;
+ if (nextValue > _UTF8_ONE_BYTE_MAX &&
+ nextValue < _UTF8_FIRST_BYTE_OF_TWO_BASE) {
+ value = ((value << 6) | (nextValue & _UTF8_LO_SIX_BIT_MASK));
+ } else {
+ // if sequence-starting code unit, reposition cursor to start here
+ if (nextValue >= _UTF8_FIRST_BYTE_OF_TWO_BASE) {
+ utf8EncodedBytesIterator.backup();
+ }
+ break;
+ }
+ j++;
+ }
+ bool validSequence = (j == additionalBytes && (
+ value < UNICODE_UTF16_RESERVED_LO ||
+ value > UNICODE_UTF16_RESERVED_HI));
+ bool nonOverlong =
+ (additionalBytes == 1 && value > _UTF8_ONE_BYTE_MAX) ||
+ (additionalBytes == 2 && value > _UTF8_TWO_BYTE_MAX) ||
+ (additionalBytes == 3 && value > _UTF8_THREE_BYTE_MAX);
+ bool inRange = value <= UNICODE_VALID_RANGE_MAX;
+ if (validSequence && nonOverlong && inRange) {
+ _current = value;
+ return true;
+ } else if (replacementCodepoint != null) {
+ _current = replacementCodepoint;
+ return true;
+ } else {
+ throw new ArgumentError(
+ "Invalid UTF8 at ${utf8EncodedBytesIterator.position - j}");
+ }
+ }
+}
« no previous file with comments | « mojo/public/dart/third_party/utf/lib/src/utf/utf32.dart ('k') | mojo/public/dart/third_party/utf/lib/src/utf/utf_stream.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698