| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 | |
| 5 part of dart.convert; | |
| 6 | |
| 7 /** | |
| 8 * An instance of the default implementation of the [AsciiCodec]. | |
| 9 * | |
| 10 * This instance provides a convenient access to the most common ASCII | |
| 11 * use cases. | |
| 12 * | |
| 13 * Examples: | |
| 14 * | |
| 15 * var encoded = ASCII.encode("This is ASCII!"); | |
| 16 * var decoded = ASCII.decode([0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, | |
| 17 * 0x20, 0x41, 0x53, 0x43, 0x49, 0x49, 0x21]); | |
| 18 */ | |
| 19 const AsciiCodec ASCII = const AsciiCodec(); | |
| 20 | |
| 21 const int _ASCII_MASK = 0x7F; | |
| 22 | |
| 23 /** | |
| 24 * An [AsciiCodec] allows encoding strings as ASCII bytes | |
| 25 * and decoding ASCII bytes to strings. | |
| 26 */ | |
| 27 class AsciiCodec extends Encoding { | |
| 28 final bool _allowInvalid; | |
| 29 /** | |
| 30 * Instantiates a new [AsciiCodec]. | |
| 31 * | |
| 32 * If [allowInvalid] is true, the [decode] method and the converter | |
| 33 * returned by [decoder] will default to allowing invalid values. | |
| 34 * If allowing invalid values, the values will be decoded into the Unicode | |
| 35 * Replacement character (U+FFFD). If not, an exception will be thrown. | |
| 36 * Calls to the [decode] method can choose to override this default. | |
| 37 * | |
| 38 * Encoders will not accept invalid (non Latin-1) characters. | |
| 39 */ | |
| 40 const AsciiCodec({bool allowInvalid: false}) : _allowInvalid = allowInvalid; | |
| 41 | |
| 42 String get name => "us-ascii"; | |
| 43 | |
| 44 /** | |
| 45 * Decodes the ASCII [bytes] (a list of unsigned 7-bit integers) to the | |
| 46 * corresponding string. | |
| 47 * | |
| 48 * If [bytes] contains values that are not in the range 0 .. 127, the decoder | |
| 49 * will eventually throw a [FormatException]. | |
| 50 * | |
| 51 * If [allowInvalid] is not provided, it defaults to the value used to create | |
| 52 * this [AsciiCodec]. | |
| 53 */ | |
| 54 String decode(List<int> bytes, { bool allowInvalid }) { | |
| 55 if (allowInvalid == null) allowInvalid = _allowInvalid; | |
| 56 if (allowInvalid) { | |
| 57 return const AsciiDecoder(allowInvalid: true).convert(bytes); | |
| 58 } else { | |
| 59 return const AsciiDecoder(allowInvalid: false).convert(bytes); | |
| 60 } | |
| 61 } | |
| 62 | |
| 63 AsciiEncoder get encoder => const AsciiEncoder(); | |
| 64 | |
| 65 AsciiDecoder get decoder => | |
| 66 _allowInvalid ? const AsciiDecoder(allowInvalid: true) | |
| 67 : const AsciiDecoder(allowInvalid: false); | |
| 68 } | |
| 69 | |
| 70 // Superclass for [AsciiEncoder] and [Latin1Encoder]. | |
| 71 // Generalizes common operations that only differ by a mask; | |
| 72 class _UnicodeSubsetEncoder extends Converter<String, List<int>> { | |
| 73 final int _subsetMask; | |
| 74 | |
| 75 const _UnicodeSubsetEncoder(this._subsetMask); | |
| 76 | |
| 77 /** | |
| 78 * Converts the [String] into a list of its code units. | |
| 79 * | |
| 80 * If [start] and [end] are provided, only the substring | |
| 81 * `string.substring(start, end)` is used as input to the conversion. | |
| 82 */ | |
| 83 List<int> convert(String string, [int start = 0, int end]) { | |
| 84 int stringLength = string.length; | |
| 85 RangeError.checkValidRange(start, end, stringLength); | |
| 86 if (end == null) end = stringLength; | |
| 87 int length = end - start; | |
| 88 List<int> result = new Uint8List(length); | |
| 89 for (int i = 0; i < length; i++) { | |
| 90 var codeUnit = string.codeUnitAt(start + i); | |
| 91 if ((codeUnit & ~_subsetMask) != 0) { | |
| 92 throw new ArgumentError("String contains invalid characters."); | |
| 93 } | |
| 94 result[i] = codeUnit; | |
| 95 } | |
| 96 return result; | |
| 97 } | |
| 98 | |
| 99 /** | |
| 100 * Starts a chunked conversion. | |
| 101 * | |
| 102 * The converter works more efficiently if the given [sink] is a | |
| 103 * [ByteConversionSink]. | |
| 104 */ | |
| 105 StringConversionSink startChunkedConversion(Sink<List<int>> sink) { | |
| 106 if (sink is! ByteConversionSink) { | |
| 107 sink = new ByteConversionSink.from(sink); | |
| 108 } | |
| 109 return new _UnicodeSubsetEncoderSink(_subsetMask, sink); | |
| 110 } | |
| 111 | |
| 112 // Override the base-class' bind, to provide a better type. | |
| 113 Stream<List<int>> bind(Stream<String> stream) => super.bind(stream); | |
| 114 } | |
| 115 | |
| 116 /** | |
| 117 * This class converts strings of only ASCII characters to bytes. | |
| 118 */ | |
| 119 class AsciiEncoder extends _UnicodeSubsetEncoder { | |
| 120 const AsciiEncoder() : super(_ASCII_MASK); | |
| 121 } | |
| 122 | |
| 123 /** | |
| 124 * This class encodes chunked strings to bytes (unsigned 8-bit | |
| 125 * integers). | |
| 126 */ | |
| 127 class _UnicodeSubsetEncoderSink extends StringConversionSinkBase { | |
| 128 final ByteConversionSink _sink; | |
| 129 final int _subsetMask; | |
| 130 | |
| 131 _UnicodeSubsetEncoderSink(this._subsetMask, this._sink); | |
| 132 | |
| 133 void close() { | |
| 134 _sink.close(); | |
| 135 } | |
| 136 | |
| 137 void addSlice(String source, int start, int end, bool isLast) { | |
| 138 RangeError.checkValidRange(start, end, source.length); | |
| 139 for (int i = start; i < end; i++) { | |
| 140 int codeUnit = source.codeUnitAt(i); | |
| 141 if ((codeUnit & ~_subsetMask) != 0) { | |
| 142 throw new ArgumentError( | |
| 143 "Source contains invalid character with code point: $codeUnit."); | |
| 144 } | |
| 145 } | |
| 146 _sink.add(source.codeUnits.sublist(start, end)); | |
| 147 if (isLast) { | |
| 148 close(); | |
| 149 } | |
| 150 } | |
| 151 } | |
| 152 | |
| 153 /** | |
| 154 * This class converts Latin-1 bytes (lists of unsigned 8-bit integers) | |
| 155 * to a string. | |
| 156 */ | |
| 157 abstract class _UnicodeSubsetDecoder extends Converter<List<int>, String> { | |
| 158 final bool _allowInvalid; | |
| 159 final int _subsetMask; | |
| 160 | |
| 161 /** | |
| 162 * Instantiates a new decoder. | |
| 163 * | |
| 164 * The [_allowInvalid] argument defines how [convert] deals | |
| 165 * with invalid bytes. | |
| 166 * | |
| 167 * The [_subsetMask] argument is a bit mask used to define the subset | |
| 168 * of Unicode being decoded. Use [_LATIN1_MASK] for Latin-1 (8-bit) or | |
| 169 * [_ASCII_MASK] for ASCII (7-bit). | |
| 170 * | |
| 171 * If [_allowInvalid] is `true`, [convert] replaces invalid bytes with the | |
| 172 * Unicode Replacement character `U+FFFD` (�). | |
| 173 * Otherwise it throws a [FormatException]. | |
| 174 */ | |
| 175 const _UnicodeSubsetDecoder(this._allowInvalid, this._subsetMask); | |
| 176 | |
| 177 /** | |
| 178 * Converts the [bytes] (a list of unsigned 7- or 8-bit integers) to the | |
| 179 * corresponding string. | |
| 180 * | |
| 181 * If [start] and [end] are provided, only the sub-list of bytes from | |
| 182 * `start` to `end` (`end` not inclusive) is used as input to the conversion. | |
| 183 */ | |
| 184 String convert(List<int> bytes, [int start = 0, int end]) { | |
| 185 int byteCount = bytes.length; | |
| 186 RangeError.checkValidRange(start, end, byteCount); | |
| 187 if (end == null) end = byteCount; | |
| 188 | |
| 189 for (int i = start; i < end; i++) { | |
| 190 int byte = bytes[i]; | |
| 191 if ((byte & ~_subsetMask) != 0) { | |
| 192 if (!_allowInvalid) { | |
| 193 throw new FormatException("Invalid value in input: $byte"); | |
| 194 } | |
| 195 return _convertInvalid(bytes, start, end); | |
| 196 } | |
| 197 } | |
| 198 return new String.fromCharCodes(bytes, start, end); | |
| 199 } | |
| 200 | |
| 201 String _convertInvalid(List<int> bytes, int start, int end) { | |
| 202 StringBuffer buffer = new StringBuffer(); | |
| 203 for (int i = start; i < end; i++) { | |
| 204 int value = bytes[i]; | |
| 205 if ((value & ~_subsetMask) != 0) value = 0xFFFD; | |
| 206 buffer.writeCharCode(value); | |
| 207 } | |
| 208 return buffer.toString(); | |
| 209 } | |
| 210 | |
| 211 /** | |
| 212 * Starts a chunked conversion. | |
| 213 * | |
| 214 * The converter works more efficiently if the given [sink] is a | |
| 215 * [StringConversionSink]. | |
| 216 */ | |
| 217 ByteConversionSink startChunkedConversion(Sink<String> sink); | |
| 218 | |
| 219 // Override the base-class's bind, to provide a better type. | |
| 220 Stream<String> bind(Stream<List<int>> stream) => super.bind(stream); | |
| 221 } | |
| 222 | |
| 223 class AsciiDecoder extends _UnicodeSubsetDecoder { | |
| 224 const AsciiDecoder({bool allowInvalid: false}) | |
| 225 : super(allowInvalid, _ASCII_MASK); | |
| 226 | |
| 227 /** | |
| 228 * Starts a chunked conversion. | |
| 229 * | |
| 230 * The converter works more efficiently if the given [sink] is a | |
| 231 * [StringConversionSink]. | |
| 232 */ | |
| 233 ByteConversionSink startChunkedConversion(Sink<String> sink) { | |
| 234 StringConversionSink stringSink; | |
| 235 if (sink is StringConversionSink) { | |
| 236 stringSink = sink; | |
| 237 } else { | |
| 238 stringSink = new StringConversionSink.from(sink); | |
| 239 } | |
| 240 // TODO(lrn): Use asUtf16Sink when it becomes available. It | |
| 241 // works just as well, is likely to have less decoding overhead, | |
| 242 // and make adding U+FFFD easier. | |
| 243 // At that time, merge this with _Latin1DecoderSink; | |
| 244 if (_allowInvalid) { | |
| 245 return new _ErrorHandlingAsciiDecoderSink(stringSink.asUtf8Sink(false)); | |
| 246 } else { | |
| 247 return new _SimpleAsciiDecoderSink(stringSink); | |
| 248 } | |
| 249 } | |
| 250 } | |
| 251 | |
| 252 class _ErrorHandlingAsciiDecoderSink extends ByteConversionSinkBase { | |
| 253 ByteConversionSink _utf8Sink; | |
| 254 _ErrorHandlingAsciiDecoderSink(this._utf8Sink); | |
| 255 | |
| 256 void close() { | |
| 257 _utf8Sink.close(); | |
| 258 } | |
| 259 | |
| 260 void add(List<int> source) { | |
| 261 addSlice(source, 0, source.length, false); | |
| 262 } | |
| 263 | |
| 264 void addSlice(List<int> source, int start, int end, bool isLast) { | |
| 265 RangeError.checkValidRange(start, end, source.length); | |
| 266 for (int i = start; i < end; i++) { | |
| 267 if ((source[i] & ~_ASCII_MASK) != 0) { | |
| 268 if (i > start) _utf8Sink.addSlice(source, start, i, false); | |
| 269 // Add UTF-8 encoding of U+FFFD. | |
| 270 _utf8Sink.add(const<int>[0xEF, 0xBF, 0xBD]); | |
| 271 start = i + 1; | |
| 272 } | |
| 273 } | |
| 274 if (start < end) { | |
| 275 _utf8Sink.addSlice(source, start, end, isLast); | |
| 276 } else if (isLast) { | |
| 277 close(); | |
| 278 } | |
| 279 } | |
| 280 } | |
| 281 | |
| 282 class _SimpleAsciiDecoderSink extends ByteConversionSinkBase { | |
| 283 Sink _sink; | |
| 284 _SimpleAsciiDecoderSink(this._sink); | |
| 285 | |
| 286 void close() { | |
| 287 _sink.close(); | |
| 288 } | |
| 289 | |
| 290 void add(List<int> source) { | |
| 291 for (int i = 0; i < source.length; i++) { | |
| 292 if ((source[i] & ~_ASCII_MASK) != 0) { | |
| 293 throw new FormatException("Source contains non-ASCII bytes."); | |
| 294 } | |
| 295 } | |
| 296 _sink.add(new String.fromCharCodes(source)); | |
| 297 } | |
| 298 | |
| 299 void addSlice(List<int> source, int start, int end, bool isLast) { | |
| 300 final int length = source.length; | |
| 301 RangeError.checkValidRange(start, end, length); | |
| 302 if (start < end) { | |
| 303 if (start != 0 || end != length) { | |
| 304 source = source.sublist(start, end); | |
| 305 } | |
| 306 add(source); | |
| 307 } | |
| 308 if (isLast) close(); | |
| 309 } | |
| 310 } | |
| OLD | NEW |