| Index: pkg/utf/lib/utf.dart
|
| diff --git a/pkg/utf/lib/utf.dart b/pkg/utf/lib/utf.dart
|
| index 002287a8ca3498e9571d5781f063101da373759e..102899065f787b42baf69bf499e6b5be36d54b8a 100644
|
| --- a/pkg/utf/lib/utf.dart
|
| +++ b/pkg/utf/lib/utf.dart
|
| @@ -11,261 +11,9 @@ library utf;
|
| import "dart:async";
|
| import "dart:collection";
|
|
|
| +part "constants.dart";
|
| +part "list_range.dart";
|
| part "utf_stream.dart";
|
| part "utf8.dart";
|
| part "utf16.dart";
|
| part "utf32.dart";
|
| -
|
| -// TODO(jmesserly): would be nice to have this on String (dartbug.com/6501).
|
| -/**
|
| - * Provide a list of Unicode codepoints for a given string.
|
| - */
|
| -List<int> stringToCodepoints(String str) {
|
| - // Note: str.codeUnits gives us 16-bit code units on all Dart implementations.
|
| - // So we need to convert.
|
| - return _utf16CodeUnitsToCodepoints(str.codeUnits);
|
| -}
|
| -
|
| -/**
|
| - * Generate a string from the provided Unicode codepoints.
|
| - *
|
| - * *Deprecated* Use [String.fromCharCodes] instead.
|
| - */
|
| -String codepointsToString(List<int> codepoints) {
|
| - return new String.fromCharCodes(codepoints);
|
| -}
|
| -
|
| -/**
|
| - * Invalid codepoints or encodings may be substituted with the value U+fffd.
|
| - */
|
| -const int UNICODE_REPLACEMENT_CHARACTER_CODEPOINT = 0xfffd;
|
| -const int UNICODE_BOM = 0xfeff;
|
| -const int UNICODE_UTF_BOM_LO = 0xff;
|
| -const int UNICODE_UTF_BOM_HI = 0xfe;
|
| -
|
| -const int UNICODE_BYTE_ZERO_MASK = 0xff;
|
| -const int UNICODE_BYTE_ONE_MASK = 0xff00;
|
| -const int UNICODE_VALID_RANGE_MAX = 0x10ffff;
|
| -const int UNICODE_PLANE_ONE_MAX = 0xffff;
|
| -const int UNICODE_UTF16_RESERVED_LO = 0xd800;
|
| -const int UNICODE_UTF16_RESERVED_HI = 0xdfff;
|
| -const int UNICODE_UTF16_OFFSET = 0x10000;
|
| -const int UNICODE_UTF16_SURROGATE_UNIT_0_BASE = 0xd800;
|
| -const int UNICODE_UTF16_SURROGATE_UNIT_1_BASE = 0xdc00;
|
| -const int UNICODE_UTF16_HI_MASK = 0xffc00;
|
| -const int UNICODE_UTF16_LO_MASK = 0x3ff;
|
| -
|
| -/**
|
| - * Encode code points as UTF16 code units.
|
| - */
|
| -List<int> _codepointsToUtf16CodeUnits(
|
| - List<int> codepoints,
|
| - [int offset = 0,
|
| - int length,
|
| - int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
|
| -
|
| - _ListRange listRange = new _ListRange(codepoints, offset, length);
|
| - int encodedLength = 0;
|
| - for (int value in listRange) {
|
| - if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) ||
|
| - (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) {
|
| - encodedLength++;
|
| - } else if (value > UNICODE_PLANE_ONE_MAX &&
|
| - value <= UNICODE_VALID_RANGE_MAX) {
|
| - encodedLength += 2;
|
| - } else {
|
| - encodedLength++;
|
| - }
|
| - }
|
| -
|
| - List<int> codeUnitsBuffer = new List<int>(encodedLength);
|
| - int j = 0;
|
| - for (int value in listRange) {
|
| - if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) ||
|
| - (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) {
|
| - codeUnitsBuffer[j++] = value;
|
| - } else if (value > UNICODE_PLANE_ONE_MAX &&
|
| - value <= UNICODE_VALID_RANGE_MAX) {
|
| - int base = value - UNICODE_UTF16_OFFSET;
|
| - codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE +
|
| - ((base & UNICODE_UTF16_HI_MASK) >> 10);
|
| - codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_1_BASE +
|
| - (base & UNICODE_UTF16_LO_MASK);
|
| - } else if (replacementCodepoint != null) {
|
| - codeUnitsBuffer[j++] = replacementCodepoint;
|
| - } else {
|
| - throw new ArgumentError("Invalid encoding");
|
| - }
|
| - }
|
| - return codeUnitsBuffer;
|
| -}
|
| -
|
| -/**
|
| - * Decodes the utf16 codeunits to codepoints.
|
| - */
|
| -List<int> _utf16CodeUnitsToCodepoints(
|
| - List<int> utf16CodeUnits, [int offset = 0, int length,
|
| - int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
|
| - _ListRangeIterator source =
|
| - (new _ListRange(utf16CodeUnits, offset, length)).iterator;
|
| - Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder
|
| - .fromListRangeIterator(source, replacementCodepoint);
|
| - List<int> codepoints = new List<int>(source.remaining);
|
| - int i = 0;
|
| - while (decoder.moveNext()) {
|
| - codepoints[i++] = decoder.current;
|
| - }
|
| - if (i == codepoints.length) {
|
| - return codepoints;
|
| - } else {
|
| - List<int> codepointTrunc = new List<int>(i);
|
| - codepointTrunc.setRange(0, i, codepoints);
|
| - return codepointTrunc;
|
| - }
|
| -}
|
| -
|
| -/**
|
| - * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units.
|
| - * The parameters can override the default Unicode replacement character. Set
|
| - * the replacementCharacter to null to throw an ArgumentError
|
| - * rather than replace the bad value.
|
| - */
|
| -class Utf16CodeUnitDecoder implements Iterator<int> {
|
| - final _ListRangeIterator utf16CodeUnitIterator;
|
| - final int replacementCodepoint;
|
| - int _current = null;
|
| -
|
| - Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length,
|
| - int this.replacementCodepoint =
|
| - UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
|
| - utf16CodeUnitIterator =
|
| - (new _ListRange(utf16CodeUnits, offset, length)).iterator;
|
| -
|
| - Utf16CodeUnitDecoder.fromListRangeIterator(
|
| - _ListRangeIterator this.utf16CodeUnitIterator,
|
| - int this.replacementCodepoint);
|
| -
|
| - Iterator<int> get iterator => this;
|
| -
|
| - int get current => _current;
|
| -
|
| - bool moveNext() {
|
| - _current = null;
|
| - if (!utf16CodeUnitIterator.moveNext()) return false;
|
| -
|
| - int value = utf16CodeUnitIterator.current;
|
| - if (value < 0) {
|
| - if (replacementCodepoint != null) {
|
| - _current = replacementCodepoint;
|
| - } else {
|
| - throw new ArgumentError(
|
| - "Invalid UTF16 at ${utf16CodeUnitIterator.position}");
|
| - }
|
| - } else if (value < UNICODE_UTF16_RESERVED_LO ||
|
| - (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) {
|
| - // transfer directly
|
| - _current = value;
|
| - } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE &&
|
| - utf16CodeUnitIterator.moveNext()) {
|
| - // merge surrogate pair
|
| - int nextValue = utf16CodeUnitIterator.current;
|
| - if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE &&
|
| - nextValue <= UNICODE_UTF16_RESERVED_HI) {
|
| - value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10;
|
| - value += UNICODE_UTF16_OFFSET +
|
| - (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE);
|
| - _current = value;
|
| - } else {
|
| - if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE &&
|
| - nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) {
|
| - utf16CodeUnitIterator.backup();
|
| - }
|
| - if (replacementCodepoint != null) {
|
| - _current = replacementCodepoint;
|
| - } else {
|
| - throw new ArgumentError(
|
| - "Invalid UTF16 at ${utf16CodeUnitIterator.position}");
|
| - }
|
| - }
|
| - } else if (replacementCodepoint != null) {
|
| - _current = replacementCodepoint;
|
| - } else {
|
| - throw new ArgumentError(
|
| - "Invalid UTF16 at ${utf16CodeUnitIterator.position}");
|
| - }
|
| - return true;
|
| - }
|
| -}
|
| -
|
| -/**
|
| - * _ListRange in an internal type used to create a lightweight Interable on a
|
| - * range within a source list. DO NOT MODIFY the underlying list while
|
| - * iterating over it. The results of doing so are undefined.
|
| - */
|
| -// TODO(floitsch): Consider removing the extend and switch to implements since
|
| -// that's cheaper to allocate.
|
| -class _ListRange extends IterableBase {
|
| - final List _source;
|
| - final int _offset;
|
| - final int _length;
|
| -
|
| - _ListRange(source, [offset = 0, length]) :
|
| - this._source = source,
|
| - this._offset = offset,
|
| - this._length = (length == null ? source.length - offset : length) {
|
| - if (_offset < 0 || _offset > _source.length) {
|
| - throw new RangeError.value(_offset);
|
| - }
|
| - if (_length != null && (_length < 0)) {
|
| - throw new RangeError.value(_length);
|
| - }
|
| - if (_length + _offset > _source.length) {
|
| - throw new RangeError.value(_length + _offset);
|
| - }
|
| - }
|
| -
|
| - _ListRangeIterator get iterator =>
|
| - new _ListRangeIteratorImpl(_source, _offset, _offset + _length);
|
| -
|
| - int get length => _length;
|
| -}
|
| -
|
| -/**
|
| - * The _ListRangeIterator provides more capabilities than a standard iterator,
|
| - * including the ability to get the current position, count remaining items,
|
| - * and move forward/backward within the iterator.
|
| - */
|
| -abstract class _ListRangeIterator implements Iterator<int> {
|
| - bool moveNext();
|
| - int get current;
|
| - int get position;
|
| - void backup([by]);
|
| - int get remaining;
|
| - void skip([count]);
|
| -}
|
| -
|
| -class _ListRangeIteratorImpl implements _ListRangeIterator {
|
| - final List<int> _source;
|
| - int _offset;
|
| - final int _end;
|
| -
|
| - _ListRangeIteratorImpl(this._source, int offset, this._end)
|
| - : _offset = offset - 1;
|
| -
|
| - int get current => _source[_offset];
|
| -
|
| - bool moveNext() => ++_offset < _end;
|
| -
|
| - int get position => _offset;
|
| -
|
| - void backup([int by = 1]) {
|
| - _offset -= by;
|
| - }
|
| -
|
| - int get remaining => _end - _offset - 1;
|
| -
|
| - void skip([int count = 1]) {
|
| - _offset += count;
|
| - }
|
| -}
|
| -
|
|
|