| OLD | NEW |
| (Empty) |
| 1 part of dart.convert; | |
| 2 const int UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD; | |
| 3 const int UNICODE_BOM_CHARACTER_RUNE = 0xFEFF; | |
| 4 const Utf8Codec UTF8 = const Utf8Codec(); | |
| 5 class Utf8Codec extends Encoding {final bool _allowMalformed; | |
| 6 const Utf8Codec({ | |
| 7 bool allowMalformed : false} | |
| 8 ) : _allowMalformed = allowMalformed; | |
| 9 String get name => "utf-8"; | |
| 10 String decode(List<int> codeUnits, { | |
| 11 bool allowMalformed} | |
| 12 ) { | |
| 13 if (allowMalformed == null) allowMalformed = _allowMalformed; | |
| 14 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); | |
| 15 } | |
| 16 Utf8Encoder get encoder => new Utf8Encoder(); | |
| 17 Utf8Decoder get decoder { | |
| 18 return new Utf8Decoder(allowMalformed: _allowMalformed); | |
| 19 } | |
| 20 } | |
| 21 class Utf8Encoder extends Converter<String, List<int>> {const Utf8Encoder(); | |
| 22 List<int> convert(String string, [int start = 0, int end]) { | |
| 23 int stringLength = string.length; | |
| 24 RangeError.checkValidRange(start, end, stringLength); | |
| 25 if (end == null) end = stringLength; | |
| 26 int length = end - start; | |
| 27 if (length == 0) return new Uint8List(0); | |
| 28 _Utf8Encoder encoder = new _Utf8Encoder.withBufferSize(length * 3); | |
| 29 int endPosition = encoder._fillBuffer(string, start, end); | |
| 30 assert (endPosition >= end - 1); if (endPosition != end) { | |
| 31 int lastCodeUnit = string.codeUnitAt(end - 1); | |
| 32 assert (_isLeadSurrogate(lastCodeUnit)); bool wasCombined = encoder._writeSur
rogate(lastCodeUnit, 0); | |
| 33 assert (!wasCombined);} | |
| 34 return encoder._buffer.sublist(0, encoder._bufferIndex); | |
| 35 } | |
| 36 StringConversionSink startChunkedConversion(Sink<List<int>> sink) { | |
| 37 if (sink is! ByteConversionSink) { | |
| 38 sink = new ByteConversionSink.from(sink); | |
| 39 } | |
| 40 return new _Utf8EncoderSink(DEVC$RT.cast(sink, DEVC$RT.type((Sink<List<int>> _)
{ | |
| 41 } | |
| 42 ), ByteConversionSink, "ImplicitCast", """line 125, column 33 of dart:convert/ut
f.dart: """, sink is ByteConversionSink, true)); | |
| 43 } | |
| 44 Stream<List<int>> bind(Stream<String> stream) => super.bind(stream); | |
| 45 } | |
| 46 class _Utf8Encoder {int _carry = 0; | |
| 47 int _bufferIndex = 0; | |
| 48 final List<int> _buffer; | |
| 49 static const _DEFAULT_BYTE_BUFFER_SIZE = 1024; | |
| 50 _Utf8Encoder() : this.withBufferSize(_DEFAULT_BYTE_BUFFER_SIZE); | |
| 51 _Utf8Encoder.withBufferSize(int bufferSize) : _buffer = _createBuffer(bufferSiz
e); | |
| 52 static List<int> _createBuffer(int size) => new Uint8List(size); | |
| 53 bool _writeSurrogate(int leadingSurrogate, int nextCodeUnit) { | |
| 54 if (_isTailSurrogate(nextCodeUnit)) { | |
| 55 int rune = _combineSurrogatePair(leadingSurrogate, nextCodeUnit); | |
| 56 assert (rune > _THREE_BYTE_LIMIT); assert (rune <= _FOUR_BYTE_LIMIT); _buffer[_
bufferIndex++] = 0xF0 | (rune >> 18); | |
| 57 _buffer[_bufferIndex++] = 0x80 | ((rune >> 12) & 0x3f); | |
| 58 _buffer[_bufferIndex++] = 0x80 | ((rune >> 6) & 0x3f); | |
| 59 _buffer[_bufferIndex++] = 0x80 | (rune & 0x3f); | |
| 60 return true; | |
| 61 } | |
| 62 else { | |
| 63 _buffer[_bufferIndex++] = 0xE0 | (leadingSurrogate >> 12); | |
| 64 _buffer[_bufferIndex++] = 0x80 | ((leadingSurrogate >> 6) & 0x3f); | |
| 65 _buffer[_bufferIndex++] = 0x80 | (leadingSurrogate & 0x3f); | |
| 66 return false; | |
| 67 } | |
| 68 } | |
| 69 int _fillBuffer(String str, int start, int end) { | |
| 70 if (start != end && _isLeadSurrogate(str.codeUnitAt(end - 1))) { | |
| 71 end--; | |
| 72 } | |
| 73 int stringIndex; | |
| 74 for (stringIndex = start; stringIndex < end; stringIndex++) { | |
| 75 int codeUnit = str.codeUnitAt(stringIndex); | |
| 76 if (codeUnit <= _ONE_BYTE_LIMIT) { | |
| 77 if (_bufferIndex >= _buffer.length) break; | |
| 78 _buffer[_bufferIndex++] = codeUnit; | |
| 79 } | |
| 80 else if (_isLeadSurrogate(codeUnit)) { | |
| 81 if (_bufferIndex + 3 >= _buffer.length) break; | |
| 82 int nextCodeUnit = str.codeUnitAt(stringIndex + 1); | |
| 83 bool wasCombined = _writeSurrogate(codeUnit, nextCodeUnit); | |
| 84 if (wasCombined) stringIndex++; | |
| 85 } | |
| 86 else { | |
| 87 int rune = codeUnit; | |
| 88 if (rune <= _TWO_BYTE_LIMIT) { | |
| 89 if (_bufferIndex + 1 >= _buffer.length) break; | |
| 90 _buffer[_bufferIndex++] = 0xC0 | (rune >> 6); | |
| 91 _buffer[_bufferIndex++] = 0x80 | (rune & 0x3f); | |
| 92 } | |
| 93 else { | |
| 94 assert (rune <= _THREE_BYTE_LIMIT); if (_bufferIndex + 2 >= _buffer.length)
break; | |
| 95 _buffer[_bufferIndex++] = 0xE0 | (rune >> 12); | |
| 96 _buffer[_bufferIndex++] = 0x80 | ((rune >> 6) & 0x3f); | |
| 97 _buffer[_bufferIndex++] = 0x80 | (rune & 0x3f); | |
| 98 } | |
| 99 } | |
| 100 } | |
| 101 return stringIndex; | |
| 102 } | |
| 103 } | |
| 104 class _Utf8EncoderSink extends _Utf8Encoder with StringConversionSinkMixin {fin
al ByteConversionSink _sink; | |
| 105 _Utf8EncoderSink(this._sink); | |
| 106 void close() { | |
| 107 if (_carry != 0) { | |
| 108 addSlice("", 0, 0, true); | |
| 109 return;} | |
| 110 _sink.close(); | |
| 111 } | |
| 112 void addSlice(String str, int start, int end, bool isLast) { | |
| 113 _bufferIndex = 0; | |
| 114 if (start == end && !isLast) { | |
| 115 return;} | |
| 116 if (_carry != 0) { | |
| 117 int nextCodeUnit = 0; | |
| 118 if (start != end) { | |
| 119 nextCodeUnit = str.codeUnitAt(start); | |
| 120 } | |
| 121 else { | |
| 122 assert (isLast);} | |
| 123 bool wasCombined = _writeSurrogate(_carry, nextCodeUnit); | |
| 124 assert (!wasCombined || start != end); if (wasCombined) start++; | |
| 125 _carry = 0; | |
| 126 } | |
| 127 do { | |
| 128 start = _fillBuffer(str, start, end); | |
| 129 bool isLastSlice = isLast && (start == end); | |
| 130 if (start == end - 1 && _isLeadSurrogate(str.codeUnitAt(start))) { | |
| 131 if (isLast && _bufferIndex < _buffer.length - 3) { | |
| 132 bool hasBeenCombined = _writeSurrogate(str.codeUnitAt(start), 0); | |
| 133 assert (!hasBeenCombined);} | |
| 134 else { | |
| 135 _carry = str.codeUnitAt(start); | |
| 136 } | |
| 137 start++; | |
| 138 } | |
| 139 _sink.addSlice(_buffer, 0, _bufferIndex, isLastSlice); | |
| 140 _bufferIndex = 0; | |
| 141 } | |
| 142 while (start < end); if (isLast) close(); | |
| 143 } | |
| 144 } | |
| 145 class Utf8Decoder extends Converter<List<int>, String> {final bool _allowMalfor
med; | |
| 146 const Utf8Decoder({ | |
| 147 bool allowMalformed : false} | |
| 148 ) : this._allowMalformed = allowMalformed; | |
| 149 String convert(List<int> codeUnits, [int start = 0, int end]) { | |
| 150 int length = codeUnits.length; | |
| 151 RangeError.checkValidRange(start, end, length); | |
| 152 if (end == null) end = length; | |
| 153 StringBuffer buffer = new StringBuffer(); | |
| 154 _Utf8Decoder decoder = new _Utf8Decoder(buffer, _allowMalformed); | |
| 155 decoder.convert(codeUnits, start, end); | |
| 156 decoder.close(); | |
| 157 return buffer.toString(); | |
| 158 } | |
| 159 ByteConversionSink startChunkedConversion(Sink<String> sink) { | |
| 160 StringConversionSink stringSink; | |
| 161 if (sink is StringConversionSink) { | |
| 162 stringSink = sink; | |
| 163 } | |
| 164 else { | |
| 165 stringSink = new StringConversionSink.from(sink); | |
| 166 } | |
| 167 return stringSink.asUtf8Sink(_allowMalformed); | |
| 168 } | |
| 169 Stream<String> bind(Stream<List<int>> stream) => super.bind(stream); | |
| 170 external Converter<List<int>, dynamic> fuse(Converter<String, dynamic> next); | |
| 171 } | |
| 172 const int _ONE_BYTE_LIMIT = 0x7f; | |
| 173 const int _TWO_BYTE_LIMIT = 0x7ff; | |
| 174 const int _THREE_BYTE_LIMIT = 0xffff; | |
| 175 const int _FOUR_BYTE_LIMIT = 0x10ffff; | |
| 176 const int _SURROGATE_MASK = 0xF800; | |
| 177 const int _SURROGATE_TAG_MASK = 0xFC00; | |
| 178 const int _SURROGATE_VALUE_MASK = 0x3FF; | |
| 179 const int _LEAD_SURROGATE_MIN = 0xD800; | |
| 180 const int _TAIL_SURROGATE_MIN = 0xDC00; | |
| 181 bool _isSurrogate(int codeUnit) => (codeUnit & _SURROGATE_MASK) == _LEAD_SURROG
ATE_MIN; | |
| 182 bool _isLeadSurrogate(int codeUnit) => (codeUnit & _SURROGATE_TAG_MASK) == _LEA
D_SURROGATE_MIN; | |
| 183 bool _isTailSurrogate(int codeUnit) => (codeUnit & _SURROGATE_TAG_MASK) == _TAI
L_SURROGATE_MIN; | |
| 184 int _combineSurrogatePair(int lead, int tail) => 0x10000 + ((lead & _SURROGATE_
VALUE_MASK) << 10) | (tail & _SURROGATE_VALUE_MASK); | |
| 185 class _Utf8Decoder {final bool _allowMalformed; | |
| 186 final StringSink _stringSink; | |
| 187 bool _isFirstCharacter = true; | |
| 188 int _value = 0; | |
| 189 int _expectedUnits = 0; | |
| 190 int _extraUnits = 0; | |
| 191 _Utf8Decoder(this._stringSink, this._allowMalformed); | |
| 192 bool get hasPartialInput => _expectedUnits > 0; | |
| 193 static const List<int> _LIMITS = const <int> [_ONE_BYTE_LIMIT, _TWO_BYTE_LIMIT,
_THREE_BYTE_LIMIT, _FOUR_BYTE_LIMIT]; | |
| 194 void close() { | |
| 195 flush(); | |
| 196 } | |
| 197 void flush() { | |
| 198 if (hasPartialInput) { | |
| 199 if (!_allowMalformed) { | |
| 200 throw new FormatException("Unfinished UTF-8 octet sequence"); | |
| 201 } | |
| 202 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE); | |
| 203 _value = 0; | |
| 204 _expectedUnits = 0; | |
| 205 _extraUnits = 0; | |
| 206 } | |
| 207 } | |
| 208 void convert(List<int> codeUnits, int startIndex, int endIndex) { | |
| 209 int value = _value; | |
| 210 int expectedUnits = _expectedUnits; | |
| 211 int extraUnits = _extraUnits; | |
| 212 _value = 0; | |
| 213 _expectedUnits = 0; | |
| 214 _extraUnits = 0; | |
| 215 int scanOneByteCharacters(units, int from) { | |
| 216 final to = endIndex; | |
| 217 final mask = _ONE_BYTE_LIMIT; | |
| 218 for (var i = from; i < to; i++) { | |
| 219 final unit = units[i]; | |
| 220 if ((unit & mask) != unit) return i - from; | |
| 221 } | |
| 222 return to - from; | |
| 223 } | |
| 224 void addSingleBytes(int from, int to) { | |
| 225 assert (from >= startIndex && from <= endIndex); assert (to >= startIndex && to
<= endIndex); _stringSink.write(new String.fromCharCodes(codeUnits, from, to)); | |
| 226 } | |
| 227 int i = startIndex; | |
| 228 loop: while (true) { | |
| 229 multibyte: if (expectedUnits > 0) { | |
| 230 do { | |
| 231 if (i == endIndex) { | |
| 232 break loop; | |
| 233 } | |
| 234 int unit = codeUnits[i]; | |
| 235 if ((unit & 0xC0) != 0x80) { | |
| 236 expectedUnits = 0; | |
| 237 if (!_allowMalformed) { | |
| 238 throw new FormatException("Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); | |
| 239 } | |
| 240 _isFirstCharacter = false; | |
| 241 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE); | |
| 242 break multibyte; | |
| 243 } | |
| 244 else { | |
| 245 value = (value << 6) | (unit & 0x3f); | |
| 246 expectedUnits--; | |
| 247 i++; | |
| 248 } | |
| 249 } | |
| 250 while (expectedUnits > 0); if (value <= _LIMITS[extraUnits - 1]) { | |
| 251 if (!_allowMalformed) { | |
| 252 throw new FormatException("Overlong encoding of 0x${value.toRadixString(16)}"); | |
| 253 } | |
| 254 expectedUnits = extraUnits = 0; | |
| 255 value = UNICODE_REPLACEMENT_CHARACTER_RUNE; | |
| 256 } | |
| 257 if (value > _FOUR_BYTE_LIMIT) { | |
| 258 if (!_allowMalformed) { | |
| 259 throw new FormatException("Character outside valid Unicode range: " "0x${value.t
oRadixString(16)}"); | |
| 260 } | |
| 261 value = UNICODE_REPLACEMENT_CHARACTER_RUNE; | |
| 262 } | |
| 263 if (!_isFirstCharacter || value != UNICODE_BOM_CHARACTER_RUNE) { | |
| 264 _stringSink.writeCharCode(value); | |
| 265 } | |
| 266 _isFirstCharacter = false; | |
| 267 } | |
| 268 while (i < endIndex) { | |
| 269 int oneBytes = scanOneByteCharacters(codeUnits, i); | |
| 270 if (oneBytes > 0) { | |
| 271 _isFirstCharacter = false; | |
| 272 addSingleBytes(i, i + oneBytes); | |
| 273 i += oneBytes; | |
| 274 if (i == endIndex) break; | |
| 275 } | |
| 276 int unit = codeUnits[i++]; | |
| 277 if (unit < 0) { | |
| 278 if (!_allowMalformed) { | |
| 279 throw new FormatException("Negative UTF-8 code unit: -0x${(-unit).toRadixString(
16)}"); | |
| 280 } | |
| 281 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE); | |
| 282 } | |
| 283 else { | |
| 284 assert (unit > _ONE_BYTE_LIMIT); if ((unit & 0xE0) == 0xC0) { | |
| 285 value = unit & 0x1F; | |
| 286 expectedUnits = extraUnits = 1; | |
| 287 continue loop; | |
| 288 } | |
| 289 if ((unit & 0xF0) == 0xE0) { | |
| 290 value = unit & 0x0F; | |
| 291 expectedUnits = extraUnits = 2; | |
| 292 continue loop; | |
| 293 } | |
| 294 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) { | |
| 295 value = unit & 0x07; | |
| 296 expectedUnits = extraUnits = 3; | |
| 297 continue loop; | |
| 298 } | |
| 299 if (!_allowMalformed) { | |
| 300 throw new FormatException("Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); | |
| 301 } | |
| 302 value = UNICODE_REPLACEMENT_CHARACTER_RUNE; | |
| 303 expectedUnits = extraUnits = 0; | |
| 304 _isFirstCharacter = false; | |
| 305 _stringSink.writeCharCode(value); | |
| 306 } | |
| 307 } | |
| 308 break loop; | |
| 309 } | |
| 310 if (expectedUnits > 0) { | |
| 311 _value = value; | |
| 312 _expectedUnits = expectedUnits; | |
| 313 _extraUnits = extraUnits; | |
| 314 } | |
| 315 } | |
| 316 } | |
| OLD | NEW |