OLD | NEW |
| (Empty) |
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 part of dart.convert; | |
6 | |
7 /** | |
8 * An instance of the default implementation of the [Latin1Codec]. | |
9 * | |
10 * This instance provides a convenient access to the most common ISO Latin 1 | |
11 * use cases. | |
12 * | |
13 * Examples: | |
14 * | |
15 * var encoded = LATIN1.encode("blåbærgrød"); | |
16 * var decoded = LATIN1.decode([0x62, 0x6c, 0xe5, 0x62, 0xe6, | |
17 * 0x72, 0x67, 0x72, 0xf8, 0x64]); | |
18 */ | |
19 const Latin1Codec LATIN1 = const Latin1Codec(); | |
20 | |
21 const int _LATIN1_MASK = 0xFF; | |
22 | |
23 /** | |
24 * A [LatinCodec] encodes strings to ISO Latin-1 (aka ISO-8859-1) bytes | |
25 * and decodes Latin-1 bytes to strings. | |
26 */ | |
27 class Latin1Codec extends Encoding { | |
28 final bool _allowInvalid; | |
29 /** | |
30 * Instantiates a new [Latin1Codec]. | |
31 * | |
32 * If [allowInvalid] is true, the [decode] method and the converter | |
33 * returned by [decoder] will default to allowing invalid values. Invalid | |
34 * values are decoded into the Unicode Replacement character (U+FFFD). | |
35 * Calls to the [decode] method can override this default. | |
36 * | |
37 * Encoders will not accept invalid (non Latin-1) characters. | |
38 */ | |
39 const Latin1Codec({bool allowInvalid: false}) : _allowInvalid = allowInvalid; | |
40 | |
41 String get name => "iso-8859-1"; | |
42 | |
43 /** | |
44 * Decodes the Latin-1 [bytes] (a list of unsigned 8-bit integers) to the | |
45 * corresponding string. | |
46 * | |
47 * If [bytes] contains values that are not in the range 0 .. 255, the decoder | |
48 * will eventually throw a [FormatException]. | |
49 * | |
50 * If [allowInvalid] is not provided, it defaults to the value used to create | |
51 * this [Latin1Codec]. | |
52 */ | |
53 String decode(List<int> bytes, { bool allowInvalid }) { | |
54 if (allowInvalid == null) allowInvalid = _allowInvalid; | |
55 if (allowInvalid) { | |
56 return const Latin1Decoder(allowInvalid: true).convert(bytes); | |
57 } else { | |
58 return const Latin1Decoder(allowInvalid: false).convert(bytes); | |
59 } | |
60 } | |
61 | |
62 Latin1Encoder get encoder => const Latin1Encoder(); | |
63 | |
64 Latin1Decoder get decoder => | |
65 _allowInvalid ? const Latin1Decoder(allowInvalid: true) | |
66 : const Latin1Decoder(allowInvalid: false); | |
67 } | |
68 | |
69 /** | |
70 * This class converts strings of only ISO Latin-1 characters to bytes. | |
71 */ | |
72 class Latin1Encoder extends _UnicodeSubsetEncoder { | |
73 const Latin1Encoder() : super(_LATIN1_MASK); | |
74 } | |
75 | |
76 /** | |
77 * This class converts Latin-1 bytes (lists of unsigned 8-bit integers) | |
78 * to a string. | |
79 */ | |
80 class Latin1Decoder extends _UnicodeSubsetDecoder { | |
81 /** | |
82 * Instantiates a new [Latin1Decoder]. | |
83 * | |
84 * The optional [allowInvalid] argument defines how [convert] deals | |
85 * with invalid bytes. | |
86 * | |
87 * If it is `true`, [convert] replaces invalid bytes with the Unicode | |
88 * Replacement character `U+FFFD` (�). | |
89 * Otherwise it throws a [FormatException]. | |
90 */ | |
91 const Latin1Decoder({ bool allowInvalid: false }) | |
92 : super(allowInvalid, _LATIN1_MASK); | |
93 | |
94 /** | |
95 * Starts a chunked conversion. | |
96 * | |
97 * The converter works more efficiently if the given [sink] is a | |
98 * [StringConversionSink]. | |
99 */ | |
100 ByteConversionSink startChunkedConversion(Sink<String> sink) { | |
101 StringConversionSink stringSink; | |
102 if (sink is StringConversionSink) { | |
103 stringSink = sink; | |
104 } else { | |
105 stringSink = new StringConversionSink.from(sink); | |
106 } | |
107 // TODO(lrn): Use stringSink.asUtf16Sink() if it becomes available. | |
108 if (!_allowInvalid) return new _Latin1DecoderSink(stringSink); | |
109 return new _Latin1AllowInvalidDecoderSink(stringSink); | |
110 } | |
111 } | |
112 | |
113 class _Latin1DecoderSink extends ByteConversionSinkBase { | |
114 StringConversionSink _sink; | |
115 _Latin1DecoderSink(this._sink); | |
116 | |
117 void close() { | |
118 _sink.close(); | |
119 _sink = null; | |
120 } | |
121 | |
122 void add(List<int> source) { | |
123 addSlice(source, 0, source.length, false); | |
124 } | |
125 | |
126 void _addSliceToSink(List<int> source, int start, int end, bool isLast) { | |
127 // If _sink was a UTF-16 conversion sink, just add the slice directly with | |
128 // _sink.addSlice(source, start, end, isLast). | |
129 // The code below is an moderately stupid workaround until a real | |
130 // solution can be made. | |
131 _sink.add(new String.fromCharCodes(source, start, end)); | |
132 if (isLast) close(); | |
133 } | |
134 | |
135 void addSlice(List<int> source, int start, int end, bool isLast) { | |
136 end = RangeError.checkValidRange(start, end, source.length); | |
137 if (start == end) return; | |
138 if (source is! Uint8List) { | |
139 // List may contain value outside of the 0..255 range. If so, throw. | |
140 // Technically, we could excuse Uint8ClampedList as well, but it unlikely | |
141 // to be relevant. | |
142 _checkValidLatin1(source, start, end); | |
143 } | |
144 _addSliceToSink(source, start, end, isLast); | |
145 } | |
146 | |
147 static void _checkValidLatin1(List<int> source, int start, int end) { | |
148 int mask = 0; | |
149 for (int i = start; i < end; i++) { | |
150 mask |= source[i]; | |
151 } | |
152 if (mask >= 0 && mask <= _LATIN1_MASK) { | |
153 return; | |
154 } | |
155 _reportInvalidLatin1(source, start, end); // Always throws. | |
156 } | |
157 | |
158 | |
159 static void _reportInvalidLatin1(List<int> source, int start, int end) { | |
160 // Find the index of the first non-Latin-1 character code. | |
161 for (int i = start; i < end; i++) { | |
162 int char = source[i]; | |
163 if (char < 0 || char > _LATIN1_MASK) { | |
164 throw new FormatException("Source contains non-Latin-1 characters.", | |
165 source, i); | |
166 } | |
167 } | |
168 // Unreachable - we only call the function if the loop above throws. | |
169 assert(false); | |
170 } | |
171 } | |
172 | |
173 class _Latin1AllowInvalidDecoderSink extends _Latin1DecoderSink { | |
174 _Latin1AllowInvalidDecoderSink(StringConversionSink sink): super(sink); | |
175 | |
176 void addSlice(List<int> source, int start, int end, bool isLast) { | |
177 RangeError.checkValidRange(start, end, source.length); | |
178 for (int i = start; i < end; i++) { | |
179 int char = source[i]; | |
180 if (char > _LATIN1_MASK || char < 0) { | |
181 if (i > start) _addSliceToSink(source, start, i, false); | |
182 // Add UTF-8 encoding of U+FFFD. | |
183 _addSliceToSink(const[0xFFFD], 0, 1, false); | |
184 start = i + 1; | |
185 } | |
186 } | |
187 if (start < end) { | |
188 _addSliceToSink(source, start, end, isLast); | |
189 } | |
190 if (isLast) { | |
191 close(); | |
192 } | |
193 } | |
194 } | |
OLD | NEW |