Index: mojo/public/dart/third_party/utf/lib/src/utf/utf_stream.dart |
diff --git a/mojo/public/dart/third_party/utf/lib/src/utf/utf_stream.dart b/mojo/public/dart/third_party/utf/lib/src/utf/utf_stream.dart |
new file mode 100644 |
index 0000000000000000000000000000000000000000..0936616e728c4d31dcb453fd0b8f9f3708cdb610 |
--- /dev/null |
+++ b/mojo/public/dart/third_party/utf/lib/src/utf/utf_stream.dart |
@@ -0,0 +1,237 @@ |
+// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
+// for details. All rights reserved. Use of this source code is governed by a |
+// BSD-style license that can be found in the LICENSE file. |
+ |
+part of utf; |
+ |
+// TODO(floitsch): make this transformer reusable. |
+abstract class _StringDecoder |
+ implements StreamTransformer<List<int>, String>, EventSink<List<int>> { |
+ List<int> _carry; |
+ List<int> _buffer; |
+ int _replacementChar; |
+ |
+ EventSink<String> _outSink; |
+ |
+ _StringDecoder(int this._replacementChar); |
+ |
+ Stream<String> bind(Stream<List<int>> stream) { |
+ return new Stream.eventTransformed( |
+ stream, |
+ (EventSink<String> sink) { |
+ if (_outSink != null) { |
+ throw new StateError("String decoder already used"); |
+ } |
+ _outSink = sink; |
+ return this; |
+ }); |
+ } |
+ |
+ void add(List<int> bytes) { |
+ try { |
+ _buffer = <int>[]; |
+ List<int> carry = _carry; |
+ _carry = null; |
+ int pos = 0; |
+ int available = bytes.length; |
+ // If we have carry-over data, start from negative index, indicating carry |
+ // index. |
+ int goodChars = 0; |
+ if (carry != null) pos = -carry.length; |
+ while (pos < available) { |
+ int currentPos = pos; |
+ int getNext() { |
+ if (pos < 0) { |
+ return carry[pos++ + carry.length]; |
+ } else if (pos < available) { |
+ return bytes[pos++]; |
+ } |
+ return null; |
+ } |
+ int consumed = _processBytes(getNext); |
+ if (consumed > 0) { |
+ goodChars = _buffer.length; |
+ } else if (consumed == 0) { |
+ _buffer.length = goodChars; |
+ if (currentPos < 0) { |
+ _carry = []; |
+ _carry.addAll(carry); |
+ _carry.addAll(bytes); |
+ } else { |
+ _carry = bytes.sublist(currentPos); |
+ } |
+ break; |
+ } else { |
+ // Invalid byte at position pos - 1 |
+ _buffer.length = goodChars; |
+ _addChar(-1); |
+ goodChars = _buffer.length; |
+ } |
+ } |
+ if (_buffer.length > 0) { |
+ // Limit to 'goodChars', if lower than actual charCodes in the buffer. |
+ _outSink.add(new String.fromCharCodes(_buffer)); |
+ } |
+ _buffer = null; |
+ } catch (e, stackTrace) { |
+ _outSink.addError(e, stackTrace); |
+ } |
+ } |
+ |
+ void addError(error, [StackTrace stackTrace]) { |
+ _outSink.addError(error, stackTrace); |
+ } |
+ |
+ void close() { |
+ if (_carry != null) { |
+ if (_replacementChar != null) { |
+ _outSink.add(new String.fromCharCodes( |
+ new List.filled(_carry.length, _replacementChar))); |
+ } else { |
+ throw new ArgumentError('Invalid codepoint'); |
+ } |
+ } |
+ _outSink.close(); |
+ } |
+ |
+ int _processBytes(int getNext()); |
+ |
+ void _addChar(int char) { |
+ void error() { |
+ if (_replacementChar != null) { |
+ char = _replacementChar; |
+ } else { |
+ throw new ArgumentError('Invalid codepoint'); |
+ } |
+ } |
+ if (char < 0) error(); |
+ if (char >= 0xD800 && char <= 0xDFFF) error(); |
+ if (char > 0x10FFFF) error(); |
+ _buffer.add(char); |
+ } |
+} |
+ |
+/** |
+ * StringTransformer that decodes a stream of UTF-8 encoded bytes. |
+ */ |
+class Utf8DecoderTransformer extends _StringDecoder { |
+ Utf8DecoderTransformer( |
+ [int replacementChar = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) |
+ : super(replacementChar); |
+ |
+ int _processBytes(int getNext()) { |
+ int value = getNext(); |
+ if ((value & 0xFF) != value) return -1; // Not a byte. |
+ if ((value & 0x80) == 0x80) { |
+ int additionalBytes; |
+ int min; |
+ if ((value & 0xe0) == 0xc0) { // 110xxxxx |
+ value = value & 0x1F; |
+ additionalBytes = 1; |
+ min = 0x80; |
+ } else if ((value & 0xf0) == 0xe0) { // 1110xxxx |
+ value = value & 0x0F; |
+ additionalBytes = 2; |
+ min = 0x800; |
+ } else if ((value & 0xf8) == 0xf0) { // 11110xxx |
+ value = value & 0x07; |
+ additionalBytes = 3; |
+ min = 0x10000; |
+ } else if ((value & 0xfc) == 0xf8) { // 111110xx |
+ value = value & 0x03; |
+ additionalBytes = 4; |
+ min = 0x200000; |
+ } else if ((value & 0xfe) == 0xfc) { // 1111110x |
+ value = value & 0x01; |
+ additionalBytes = 5; |
+ min = 0x4000000; |
+ } else { |
+ return -1; |
+ } |
+ for (int i = 0; i < additionalBytes; i++) { |
+ int next = getNext(); |
+ if (next == null) return 0; // Not enough chars, reset. |
+ if ((next & 0xc0) != 0x80 || (next & 0xff) != next) return -1; |
+ value = value << 6 | (next & 0x3f); |
+ if (additionalBytes >= 3 && i == 0 && value << 12 > 0x10FFFF) { |
+ _addChar(-1); |
+ } |
+ } |
+ // Invalid charCode if less then minimum expected. |
+ if (value < min) value = -1; |
+ _addChar(value); |
+ return 1 + additionalBytes; |
+ } |
+ _addChar(value); |
+ return 1; |
+ } |
+} |
+ |
+ |
+abstract class _StringEncoder |
+ implements StreamTransformer<String, List<int>>, EventSink<String> { |
+ |
+ EventSink<List<int>> _outSink; |
+ |
+ Stream<List<int>> bind(Stream<String> stream) { |
+ return new Stream.eventTransformed( |
+ stream, |
+ (EventSink<List<int>> sink) { |
+ if (_outSink != null) { |
+ throw new StateError("String encoder already used"); |
+ } |
+ _outSink = sink; |
+ return this; |
+ }); |
+ } |
+ |
+ void add(String data) { |
+ _outSink.add(_processString(data)); |
+ } |
+ |
+ void addError(error, [StackTrace stackTrace]) { |
+ _outSink.addError(error, stackTrace); |
+ } |
+ |
+ void close() { _outSink.close(); } |
+ |
+ List<int> _processString(String string); |
+} |
+ |
+/** |
+ * StringTransformer that UTF-8 encodes a stream of strings. |
+ */ |
+class Utf8EncoderTransformer extends _StringEncoder { |
+ List<int> _processString(String string) { |
+ var bytes = <int>[]; |
+ int pos = 0; |
+ List<int> codepoints = utf16CodeUnitsToCodepoints(string.codeUnits); |
+ int length = codepoints.length; |
+ for (int i = 0; i < length; i++) { |
+ int additionalBytes; |
+ int charCode = codepoints[i]; |
+ if (charCode <= 0x007F) { |
+ additionalBytes = 0; |
+ bytes.add(charCode); |
+ } else if (charCode <= 0x07FF) { |
+ // 110xxxxx (xxxxx is top 5 bits). |
+ bytes.add(((charCode >> 6) & 0x1F) | 0xC0); |
+ additionalBytes = 1; |
+ } else if (charCode <= 0xFFFF) { |
+ // 1110xxxx (xxxx is top 4 bits) |
+ bytes.add(((charCode >> 12) & 0x0F)| 0xE0); |
+ additionalBytes = 2; |
+ } else { |
+ // 11110xxx (xxx is top 3 bits) |
+ bytes.add(((charCode >> 18) & 0x07) | 0xF0); |
+ additionalBytes = 3; |
+ } |
+ for (int i = additionalBytes; i > 0; i--) { |
+ // 10xxxxxx (xxxxxx is next 6 bits from the top). |
+ bytes.add(((charCode >> (6 * (i - 1))) & 0x3F) | 0x80); |
+ } |
+ pos += additionalBytes + 1; |
+ } |
+ return bytes; |
+ } |
+} |