Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(453)

Side by Side Diff: pkg/dev_compiler/tool/input_sdk/lib/convert/ascii.dart

Issue 2698353003: unfork DDC's copy of most SDK libraries (Closed)
Patch Set: revert core_patch Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 part of dart.convert;
6
7 /**
8 * An instance of the default implementation of the [AsciiCodec].
9 *
10 * This instance provides a convenient access to the most common ASCII
11 * use cases.
12 *
13 * Examples:
14 *
15 * var encoded = ASCII.encode("This is ASCII!");
16 * var decoded = ASCII.decode([0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
17 * 0x20, 0x41, 0x53, 0x43, 0x49, 0x49, 0x21]);
18 */
19 const AsciiCodec ASCII = const AsciiCodec();
20
21 const int _ASCII_MASK = 0x7F;
22
23 /**
24 * An [AsciiCodec] allows encoding strings as ASCII bytes
25 * and decoding ASCII bytes to strings.
26 */
27 class AsciiCodec extends Encoding {
28 final bool _allowInvalid;
29 /**
30 * Instantiates a new [AsciiCodec].
31 *
32 * If [allowInvalid] is true, the [decode] method and the converter
33 * returned by [decoder] will default to allowing invalid values.
34 * If allowing invalid values, the values will be decoded into the Unicode
35 * Replacement character (U+FFFD). If not, an exception will be thrown.
36 * Calls to the [decode] method can choose to override this default.
37 *
38 * Encoders will not accept invalid (non Latin-1) characters.
39 */
40 const AsciiCodec({bool allowInvalid: false}) : _allowInvalid = allowInvalid;
41
42 String get name => "us-ascii";
43
44 /**
45 * Decodes the ASCII [bytes] (a list of unsigned 7-bit integers) to the
46 * corresponding string.
47 *
48 * If [bytes] contains values that are not in the range 0 .. 127, the decoder
49 * will eventually throw a [FormatException].
50 *
51 * If [allowInvalid] is not provided, it defaults to the value used to create
52 * this [AsciiCodec].
53 */
54 String decode(List<int> bytes, { bool allowInvalid }) {
55 if (allowInvalid == null) allowInvalid = _allowInvalid;
56 if (allowInvalid) {
57 return const AsciiDecoder(allowInvalid: true).convert(bytes);
58 } else {
59 return const AsciiDecoder(allowInvalid: false).convert(bytes);
60 }
61 }
62
63 AsciiEncoder get encoder => const AsciiEncoder();
64
65 AsciiDecoder get decoder =>
66 _allowInvalid ? const AsciiDecoder(allowInvalid: true)
67 : const AsciiDecoder(allowInvalid: false);
68 }
69
70 // Superclass for [AsciiEncoder] and [Latin1Encoder].
71 // Generalizes common operations that only differ by a mask;
72 class _UnicodeSubsetEncoder extends Converter<String, List<int>> {
73 final int _subsetMask;
74
75 const _UnicodeSubsetEncoder(this._subsetMask);
76
77 /**
78 * Converts the [String] into a list of its code units.
79 *
80 * If [start] and [end] are provided, only the substring
81 * `string.substring(start, end)` is used as input to the conversion.
82 */
83 List<int> convert(String string, [int start = 0, int end]) {
84 int stringLength = string.length;
85 RangeError.checkValidRange(start, end, stringLength);
86 if (end == null) end = stringLength;
87 int length = end - start;
88 List<int> result = new Uint8List(length);
89 for (int i = 0; i < length; i++) {
90 var codeUnit = string.codeUnitAt(start + i);
91 if ((codeUnit & ~_subsetMask) != 0) {
92 throw new ArgumentError("String contains invalid characters.");
93 }
94 result[i] = codeUnit;
95 }
96 return result;
97 }
98
99 /**
100 * Starts a chunked conversion.
101 *
102 * The converter works more efficiently if the given [sink] is a
103 * [ByteConversionSink].
104 */
105 StringConversionSink startChunkedConversion(Sink<List<int>> sink) {
106 if (sink is! ByteConversionSink) {
107 sink = new ByteConversionSink.from(sink);
108 }
109 return new _UnicodeSubsetEncoderSink(_subsetMask, sink);
110 }
111
112 // Override the base-class' bind, to provide a better type.
113 Stream<List<int>> bind(Stream<String> stream) => super.bind(stream);
114 }
115
116 /**
117 * This class converts strings of only ASCII characters to bytes.
118 */
119 class AsciiEncoder extends _UnicodeSubsetEncoder {
120 const AsciiEncoder() : super(_ASCII_MASK);
121 }
122
123 /**
124 * This class encodes chunked strings to bytes (unsigned 8-bit
125 * integers).
126 */
127 class _UnicodeSubsetEncoderSink extends StringConversionSinkBase {
128 final ByteConversionSink _sink;
129 final int _subsetMask;
130
131 _UnicodeSubsetEncoderSink(this._subsetMask, this._sink);
132
133 void close() {
134 _sink.close();
135 }
136
137 void addSlice(String source, int start, int end, bool isLast) {
138 RangeError.checkValidRange(start, end, source.length);
139 for (int i = start; i < end; i++) {
140 int codeUnit = source.codeUnitAt(i);
141 if ((codeUnit & ~_subsetMask) != 0) {
142 throw new ArgumentError(
143 "Source contains invalid character with code point: $codeUnit.");
144 }
145 }
146 _sink.add(source.codeUnits.sublist(start, end));
147 if (isLast) {
148 close();
149 }
150 }
151 }
152
153 /**
154 * This class converts Latin-1 bytes (lists of unsigned 8-bit integers)
155 * to a string.
156 */
157 abstract class _UnicodeSubsetDecoder extends Converter<List<int>, String> {
158 final bool _allowInvalid;
159 final int _subsetMask;
160
161 /**
162 * Instantiates a new decoder.
163 *
164 * The [_allowInvalid] argument defines how [convert] deals
165 * with invalid bytes.
166 *
167 * The [_subsetMask] argument is a bit mask used to define the subset
168 * of Unicode being decoded. Use [_LATIN1_MASK] for Latin-1 (8-bit) or
169 * [_ASCII_MASK] for ASCII (7-bit).
170 *
171 * If [_allowInvalid] is `true`, [convert] replaces invalid bytes with the
172 * Unicode Replacement character `U+FFFD` (�).
173 * Otherwise it throws a [FormatException].
174 */
175 const _UnicodeSubsetDecoder(this._allowInvalid, this._subsetMask);
176
177 /**
178 * Converts the [bytes] (a list of unsigned 7- or 8-bit integers) to the
179 * corresponding string.
180 *
181 * If [start] and [end] are provided, only the sub-list of bytes from
182 * `start` to `end` (`end` not inclusive) is used as input to the conversion.
183 */
184 String convert(List<int> bytes, [int start = 0, int end]) {
185 int byteCount = bytes.length;
186 RangeError.checkValidRange(start, end, byteCount);
187 if (end == null) end = byteCount;
188
189 for (int i = start; i < end; i++) {
190 int byte = bytes[i];
191 if ((byte & ~_subsetMask) != 0) {
192 if (!_allowInvalid) {
193 throw new FormatException("Invalid value in input: $byte");
194 }
195 return _convertInvalid(bytes, start, end);
196 }
197 }
198 return new String.fromCharCodes(bytes, start, end);
199 }
200
201 String _convertInvalid(List<int> bytes, int start, int end) {
202 StringBuffer buffer = new StringBuffer();
203 for (int i = start; i < end; i++) {
204 int value = bytes[i];
205 if ((value & ~_subsetMask) != 0) value = 0xFFFD;
206 buffer.writeCharCode(value);
207 }
208 return buffer.toString();
209 }
210
211 /**
212 * Starts a chunked conversion.
213 *
214 * The converter works more efficiently if the given [sink] is a
215 * [StringConversionSink].
216 */
217 ByteConversionSink startChunkedConversion(Sink<String> sink);
218
219 // Override the base-class's bind, to provide a better type.
220 Stream<String> bind(Stream<List<int>> stream) => super.bind(stream);
221 }
222
223 class AsciiDecoder extends _UnicodeSubsetDecoder {
224 const AsciiDecoder({bool allowInvalid: false})
225 : super(allowInvalid, _ASCII_MASK);
226
227 /**
228 * Starts a chunked conversion.
229 *
230 * The converter works more efficiently if the given [sink] is a
231 * [StringConversionSink].
232 */
233 ByteConversionSink startChunkedConversion(Sink<String> sink) {
234 StringConversionSink stringSink;
235 if (sink is StringConversionSink) {
236 stringSink = sink;
237 } else {
238 stringSink = new StringConversionSink.from(sink);
239 }
240 // TODO(lrn): Use asUtf16Sink when it becomes available. It
241 // works just as well, is likely to have less decoding overhead,
242 // and make adding U+FFFD easier.
243 // At that time, merge this with _Latin1DecoderSink;
244 if (_allowInvalid) {
245 return new _ErrorHandlingAsciiDecoderSink(stringSink.asUtf8Sink(false));
246 } else {
247 return new _SimpleAsciiDecoderSink(stringSink);
248 }
249 }
250 }
251
252 class _ErrorHandlingAsciiDecoderSink extends ByteConversionSinkBase {
253 ByteConversionSink _utf8Sink;
254 _ErrorHandlingAsciiDecoderSink(this._utf8Sink);
255
256 void close() {
257 _utf8Sink.close();
258 }
259
260 void add(List<int> source) {
261 addSlice(source, 0, source.length, false);
262 }
263
264 void addSlice(List<int> source, int start, int end, bool isLast) {
265 RangeError.checkValidRange(start, end, source.length);
266 for (int i = start; i < end; i++) {
267 if ((source[i] & ~_ASCII_MASK) != 0) {
268 if (i > start) _utf8Sink.addSlice(source, start, i, false);
269 // Add UTF-8 encoding of U+FFFD.
270 _utf8Sink.add(const<int>[0xEF, 0xBF, 0xBD]);
271 start = i + 1;
272 }
273 }
274 if (start < end) {
275 _utf8Sink.addSlice(source, start, end, isLast);
276 } else if (isLast) {
277 close();
278 }
279 }
280 }
281
282 class _SimpleAsciiDecoderSink extends ByteConversionSinkBase {
283 Sink _sink;
284 _SimpleAsciiDecoderSink(this._sink);
285
286 void close() {
287 _sink.close();
288 }
289
290 void add(List<int> source) {
291 for (int i = 0; i < source.length; i++) {
292 if ((source[i] & ~_ASCII_MASK) != 0) {
293 throw new FormatException("Source contains non-ASCII bytes.");
294 }
295 }
296 _sink.add(new String.fromCharCodes(source));
297 }
298
299 void addSlice(List<int> source, int start, int end, bool isLast) {
300 final int length = source.length;
301 RangeError.checkValidRange(start, end, length);
302 if (start < end) {
303 if (start != 0 || end != length) {
304 source = source.sublist(start, end);
305 }
306 add(source);
307 }
308 if (isLast) close();
309 }
310 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698