| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 | |
| 5 part of dart.convert; | |
| 6 | |
| 7 /** | |
| 8 * An instance of the default implementation of the [Latin1Codec]. | |
| 9 * | |
| 10 * This instance provides a convenient access to the most common ISO Latin 1 | |
| 11 * use cases. | |
| 12 * | |
| 13 * Examples: | |
| 14 * | |
| 15 * var encoded = LATIN1.encode("blåbærgrød"); | |
| 16 * var decoded = LATIN1.decode([0x62, 0x6c, 0xe5, 0x62, 0xe6, | |
| 17 * 0x72, 0x67, 0x72, 0xf8, 0x64]); | |
| 18 */ | |
| 19 const Latin1Codec LATIN1 = const Latin1Codec(); | |
| 20 | |
| 21 const int _LATIN1_MASK = 0xFF; | |
| 22 | |
| 23 /** | |
| 24 * A [LatinCodec] encodes strings to ISO Latin-1 (aka ISO-8859-1) bytes | |
| 25 * and decodes Latin-1 bytes to strings. | |
| 26 */ | |
| 27 class Latin1Codec extends Encoding { | |
| 28 final bool _allowInvalid; | |
| 29 /** | |
| 30 * Instantiates a new [Latin1Codec]. | |
| 31 * | |
| 32 * If [allowInvalid] is true, the [decode] method and the converter | |
| 33 * returned by [decoder] will default to allowing invalid values. Invalid | |
| 34 * values are decoded into the Unicode Replacement character (U+FFFD). | |
| 35 * Calls to the [decode] method can override this default. | |
| 36 * | |
| 37 * Encoders will not accept invalid (non Latin-1) characters. | |
| 38 */ | |
| 39 const Latin1Codec({bool allowInvalid: false}) : _allowInvalid = allowInvalid; | |
| 40 | |
| 41 String get name => "iso-8859-1"; | |
| 42 | |
| 43 /** | |
| 44 * Decodes the Latin-1 [bytes] (a list of unsigned 8-bit integers) to the | |
| 45 * corresponding string. | |
| 46 * | |
| 47 * If [bytes] contains values that are not in the range 0 .. 255, the decoder | |
| 48 * will eventually throw a [FormatException]. | |
| 49 * | |
| 50 * If [allowInvalid] is not provided, it defaults to the value used to create | |
| 51 * this [Latin1Codec]. | |
| 52 */ | |
| 53 String decode(List<int> bytes, { bool allowInvalid }) { | |
| 54 if (allowInvalid == null) allowInvalid = _allowInvalid; | |
| 55 if (allowInvalid) { | |
| 56 return const Latin1Decoder(allowInvalid: true).convert(bytes); | |
| 57 } else { | |
| 58 return const Latin1Decoder(allowInvalid: false).convert(bytes); | |
| 59 } | |
| 60 } | |
| 61 | |
| 62 Latin1Encoder get encoder => const Latin1Encoder(); | |
| 63 | |
| 64 Latin1Decoder get decoder => | |
| 65 _allowInvalid ? const Latin1Decoder(allowInvalid: true) | |
| 66 : const Latin1Decoder(allowInvalid: false); | |
| 67 } | |
| 68 | |
| 69 /** | |
| 70 * This class converts strings of only ISO Latin-1 characters to bytes. | |
| 71 */ | |
| 72 class Latin1Encoder extends _UnicodeSubsetEncoder { | |
| 73 const Latin1Encoder() : super(_LATIN1_MASK); | |
| 74 } | |
| 75 | |
| 76 /** | |
| 77 * This class converts Latin-1 bytes (lists of unsigned 8-bit integers) | |
| 78 * to a string. | |
| 79 */ | |
| 80 class Latin1Decoder extends _UnicodeSubsetDecoder { | |
| 81 /** | |
| 82 * Instantiates a new [Latin1Decoder]. | |
| 83 * | |
| 84 * The optional [allowInvalid] argument defines how [convert] deals | |
| 85 * with invalid bytes. | |
| 86 * | |
| 87 * If it is `true`, [convert] replaces invalid bytes with the Unicode | |
| 88 * Replacement character `U+FFFD` (�). | |
| 89 * Otherwise it throws a [FormatException]. | |
| 90 */ | |
| 91 const Latin1Decoder({ bool allowInvalid: false }) | |
| 92 : super(allowInvalid, _LATIN1_MASK); | |
| 93 | |
| 94 /** | |
| 95 * Starts a chunked conversion. | |
| 96 * | |
| 97 * The converter works more efficiently if the given [sink] is a | |
| 98 * [StringConversionSink]. | |
| 99 */ | |
| 100 ByteConversionSink startChunkedConversion(Sink<String> sink) { | |
| 101 StringConversionSink stringSink; | |
| 102 if (sink is StringConversionSink) { | |
| 103 stringSink = sink; | |
| 104 } else { | |
| 105 stringSink = new StringConversionSink.from(sink); | |
| 106 } | |
| 107 // TODO(lrn): Use stringSink.asUtf16Sink() if it becomes available. | |
| 108 if (!_allowInvalid) return new _Latin1DecoderSink(stringSink); | |
| 109 return new _Latin1AllowInvalidDecoderSink(stringSink); | |
| 110 } | |
| 111 } | |
| 112 | |
| 113 class _Latin1DecoderSink extends ByteConversionSinkBase { | |
| 114 StringConversionSink _sink; | |
| 115 _Latin1DecoderSink(this._sink); | |
| 116 | |
| 117 void close() { | |
| 118 _sink.close(); | |
| 119 _sink = null; | |
| 120 } | |
| 121 | |
| 122 void add(List<int> source) { | |
| 123 addSlice(source, 0, source.length, false); | |
| 124 } | |
| 125 | |
| 126 void _addSliceToSink(List<int> source, int start, int end, bool isLast) { | |
| 127 // If _sink was a UTF-16 conversion sink, just add the slice directly with | |
| 128 // _sink.addSlice(source, start, end, isLast). | |
| 129 // The code below is an moderately stupid workaround until a real | |
| 130 // solution can be made. | |
| 131 _sink.add(new String.fromCharCodes(source, start, end)); | |
| 132 if (isLast) close(); | |
| 133 } | |
| 134 | |
| 135 void addSlice(List<int> source, int start, int end, bool isLast) { | |
| 136 end = RangeError.checkValidRange(start, end, source.length); | |
| 137 if (start == end) return; | |
| 138 if (source is! Uint8List) { | |
| 139 // List may contain value outside of the 0..255 range. If so, throw. | |
| 140 // Technically, we could excuse Uint8ClampedList as well, but it unlikely | |
| 141 // to be relevant. | |
| 142 _checkValidLatin1(source, start, end); | |
| 143 } | |
| 144 _addSliceToSink(source, start, end, isLast); | |
| 145 } | |
| 146 | |
| 147 static void _checkValidLatin1(List<int> source, int start, int end) { | |
| 148 int mask = 0; | |
| 149 for (int i = start; i < end; i++) { | |
| 150 mask |= source[i]; | |
| 151 } | |
| 152 if (mask >= 0 && mask <= _LATIN1_MASK) { | |
| 153 return; | |
| 154 } | |
| 155 _reportInvalidLatin1(source, start, end); // Always throws. | |
| 156 } | |
| 157 | |
| 158 | |
| 159 static void _reportInvalidLatin1(List<int> source, int start, int end) { | |
| 160 // Find the index of the first non-Latin-1 character code. | |
| 161 for (int i = start; i < end; i++) { | |
| 162 int char = source[i]; | |
| 163 if (char < 0 || char > _LATIN1_MASK) { | |
| 164 throw new FormatException("Source contains non-Latin-1 characters.", | |
| 165 source, i); | |
| 166 } | |
| 167 } | |
| 168 // Unreachable - we only call the function if the loop above throws. | |
| 169 assert(false); | |
| 170 } | |
| 171 } | |
| 172 | |
| 173 class _Latin1AllowInvalidDecoderSink extends _Latin1DecoderSink { | |
| 174 _Latin1AllowInvalidDecoderSink(StringConversionSink sink): super(sink); | |
| 175 | |
| 176 void addSlice(List<int> source, int start, int end, bool isLast) { | |
| 177 RangeError.checkValidRange(start, end, source.length); | |
| 178 for (int i = start; i < end; i++) { | |
| 179 int char = source[i]; | |
| 180 if (char > _LATIN1_MASK || char < 0) { | |
| 181 if (i > start) _addSliceToSink(source, start, i, false); | |
| 182 // Add UTF-8 encoding of U+FFFD. | |
| 183 _addSliceToSink(const[0xFFFD], 0, 1, false); | |
| 184 start = i + 1; | |
| 185 } | |
| 186 } | |
| 187 if (start < end) { | |
| 188 _addSliceToSink(source, start, end, isLast); | |
| 189 } | |
| 190 if (isLast) { | |
| 191 close(); | |
| 192 } | |
| 193 } | |
| 194 } | |
| OLD | NEW |