| Index: mojo/public/dart/third_party/utf/lib/src/utf/utf_stream.dart
|
| diff --git a/mojo/public/dart/third_party/utf/lib/src/utf/utf_stream.dart b/mojo/public/dart/third_party/utf/lib/src/utf/utf_stream.dart
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..0936616e728c4d31dcb453fd0b8f9f3708cdb610
|
| --- /dev/null
|
| +++ b/mojo/public/dart/third_party/utf/lib/src/utf/utf_stream.dart
|
| @@ -0,0 +1,237 @@
|
| +// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
|
| +// for details. All rights reserved. Use of this source code is governed by a
|
| +// BSD-style license that can be found in the LICENSE file.
|
| +
|
| +part of utf;
|
| +
|
| +// TODO(floitsch): make this transformer reusable.
|
| +abstract class _StringDecoder
|
| + implements StreamTransformer<List<int>, String>, EventSink<List<int>> {
|
| + List<int> _carry;
|
| + List<int> _buffer;
|
| + int _replacementChar;
|
| +
|
| + EventSink<String> _outSink;
|
| +
|
| + _StringDecoder(int this._replacementChar);
|
| +
|
| + Stream<String> bind(Stream<List<int>> stream) {
|
| + return new Stream.eventTransformed(
|
| + stream,
|
| + (EventSink<String> sink) {
|
| + if (_outSink != null) {
|
| + throw new StateError("String decoder already used");
|
| + }
|
| + _outSink = sink;
|
| + return this;
|
| + });
|
| + }
|
| +
|
| + void add(List<int> bytes) {
|
| + try {
|
| + _buffer = <int>[];
|
| + List<int> carry = _carry;
|
| + _carry = null;
|
| + int pos = 0;
|
| + int available = bytes.length;
|
| + // If we have carry-over data, start from negative index, indicating carry
|
| + // index.
|
| + int goodChars = 0;
|
| + if (carry != null) pos = -carry.length;
|
| + while (pos < available) {
|
| + int currentPos = pos;
|
| + int getNext() {
|
| + if (pos < 0) {
|
| + return carry[pos++ + carry.length];
|
| + } else if (pos < available) {
|
| + return bytes[pos++];
|
| + }
|
| + return null;
|
| + }
|
| + int consumed = _processBytes(getNext);
|
| + if (consumed > 0) {
|
| + goodChars = _buffer.length;
|
| + } else if (consumed == 0) {
|
| + _buffer.length = goodChars;
|
| + if (currentPos < 0) {
|
| + _carry = [];
|
| + _carry.addAll(carry);
|
| + _carry.addAll(bytes);
|
| + } else {
|
| + _carry = bytes.sublist(currentPos);
|
| + }
|
| + break;
|
| + } else {
|
| + // Invalid byte at position pos - 1
|
| + _buffer.length = goodChars;
|
| + _addChar(-1);
|
| + goodChars = _buffer.length;
|
| + }
|
| + }
|
| + if (_buffer.length > 0) {
|
| + // Limit to 'goodChars', if lower than actual charCodes in the buffer.
|
| + _outSink.add(new String.fromCharCodes(_buffer));
|
| + }
|
| + _buffer = null;
|
| + } catch (e, stackTrace) {
|
| + _outSink.addError(e, stackTrace);
|
| + }
|
| + }
|
| +
|
| + void addError(error, [StackTrace stackTrace]) {
|
| + _outSink.addError(error, stackTrace);
|
| + }
|
| +
|
| + void close() {
|
| + if (_carry != null) {
|
| + if (_replacementChar != null) {
|
| + _outSink.add(new String.fromCharCodes(
|
| + new List.filled(_carry.length, _replacementChar)));
|
| + } else {
|
| + throw new ArgumentError('Invalid codepoint');
|
| + }
|
| + }
|
| + _outSink.close();
|
| + }
|
| +
|
| + int _processBytes(int getNext());
|
| +
|
| + void _addChar(int char) {
|
| + void error() {
|
| + if (_replacementChar != null) {
|
| + char = _replacementChar;
|
| + } else {
|
| + throw new ArgumentError('Invalid codepoint');
|
| + }
|
| + }
|
| + if (char < 0) error();
|
| + if (char >= 0xD800 && char <= 0xDFFF) error();
|
| + if (char > 0x10FFFF) error();
|
| + _buffer.add(char);
|
| + }
|
| +}
|
| +
|
| +/**
|
| + * StringTransformer that decodes a stream of UTF-8 encoded bytes.
|
| + */
|
| +class Utf8DecoderTransformer extends _StringDecoder {
|
| + Utf8DecoderTransformer(
|
| + [int replacementChar = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])
|
| + : super(replacementChar);
|
| +
|
| + int _processBytes(int getNext()) {
|
| + int value = getNext();
|
| + if ((value & 0xFF) != value) return -1; // Not a byte.
|
| + if ((value & 0x80) == 0x80) {
|
| + int additionalBytes;
|
| + int min;
|
| + if ((value & 0xe0) == 0xc0) { // 110xxxxx
|
| + value = value & 0x1F;
|
| + additionalBytes = 1;
|
| + min = 0x80;
|
| + } else if ((value & 0xf0) == 0xe0) { // 1110xxxx
|
| + value = value & 0x0F;
|
| + additionalBytes = 2;
|
| + min = 0x800;
|
| + } else if ((value & 0xf8) == 0xf0) { // 11110xxx
|
| + value = value & 0x07;
|
| + additionalBytes = 3;
|
| + min = 0x10000;
|
| + } else if ((value & 0xfc) == 0xf8) { // 111110xx
|
| + value = value & 0x03;
|
| + additionalBytes = 4;
|
| + min = 0x200000;
|
| + } else if ((value & 0xfe) == 0xfc) { // 1111110x
|
| + value = value & 0x01;
|
| + additionalBytes = 5;
|
| + min = 0x4000000;
|
| + } else {
|
| + return -1;
|
| + }
|
| + for (int i = 0; i < additionalBytes; i++) {
|
| + int next = getNext();
|
| + if (next == null) return 0; // Not enough chars, reset.
|
| + if ((next & 0xc0) != 0x80 || (next & 0xff) != next) return -1;
|
| + value = value << 6 | (next & 0x3f);
|
| + if (additionalBytes >= 3 && i == 0 && value << 12 > 0x10FFFF) {
|
| + _addChar(-1);
|
| + }
|
| + }
|
| + // Invalid charCode if less then minimum expected.
|
| + if (value < min) value = -1;
|
| + _addChar(value);
|
| + return 1 + additionalBytes;
|
| + }
|
| + _addChar(value);
|
| + return 1;
|
| + }
|
| +}
|
| +
|
| +
|
| +abstract class _StringEncoder
|
| + implements StreamTransformer<String, List<int>>, EventSink<String> {
|
| +
|
| + EventSink<List<int>> _outSink;
|
| +
|
| + Stream<List<int>> bind(Stream<String> stream) {
|
| + return new Stream.eventTransformed(
|
| + stream,
|
| + (EventSink<List<int>> sink) {
|
| + if (_outSink != null) {
|
| + throw new StateError("String encoder already used");
|
| + }
|
| + _outSink = sink;
|
| + return this;
|
| + });
|
| + }
|
| +
|
| + void add(String data) {
|
| + _outSink.add(_processString(data));
|
| + }
|
| +
|
| + void addError(error, [StackTrace stackTrace]) {
|
| + _outSink.addError(error, stackTrace);
|
| + }
|
| +
|
| + void close() { _outSink.close(); }
|
| +
|
| + List<int> _processString(String string);
|
| +}
|
| +
|
| +/**
|
| + * StringTransformer that UTF-8 encodes a stream of strings.
|
| + */
|
| +class Utf8EncoderTransformer extends _StringEncoder {
|
| + List<int> _processString(String string) {
|
| + var bytes = <int>[];
|
| + int pos = 0;
|
| + List<int> codepoints = utf16CodeUnitsToCodepoints(string.codeUnits);
|
| + int length = codepoints.length;
|
| + for (int i = 0; i < length; i++) {
|
| + int additionalBytes;
|
| + int charCode = codepoints[i];
|
| + if (charCode <= 0x007F) {
|
| + additionalBytes = 0;
|
| + bytes.add(charCode);
|
| + } else if (charCode <= 0x07FF) {
|
| + // 110xxxxx (xxxxx is top 5 bits).
|
| + bytes.add(((charCode >> 6) & 0x1F) | 0xC0);
|
| + additionalBytes = 1;
|
| + } else if (charCode <= 0xFFFF) {
|
| + // 1110xxxx (xxxx is top 4 bits)
|
| + bytes.add(((charCode >> 12) & 0x0F)| 0xE0);
|
| + additionalBytes = 2;
|
| + } else {
|
| + // 11110xxx (xxx is top 3 bits)
|
| + bytes.add(((charCode >> 18) & 0x07) | 0xF0);
|
| + additionalBytes = 3;
|
| + }
|
| + for (int i = additionalBytes; i > 0; i--) {
|
| + // 10xxxxxx (xxxxxx is next 6 bits from the top).
|
| + bytes.add(((charCode >> (6 * (i - 1))) & 0x3F) | 0x80);
|
| + }
|
| + pos += additionalBytes + 1;
|
| + }
|
| + return bytes;
|
| + }
|
| +}
|
|
|