Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: test/dart_codegen/expect/convert/utf.dart

Issue 1148283010: Remove dart backend (Closed) Base URL: https://github.com/dart-lang/dev_compiler.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 part of dart.convert;
2 const int UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD;
3 const int UNICODE_BOM_CHARACTER_RUNE = 0xFEFF;
4 const Utf8Codec UTF8 = const Utf8Codec();
5 class Utf8Codec extends Encoding {final bool _allowMalformed;
6 const Utf8Codec({
7 bool allowMalformed : false}
8 ) : _allowMalformed = allowMalformed;
9 String get name => "utf-8";
10 String decode(List<int> codeUnits, {
11 bool allowMalformed}
12 ) {
13 if (allowMalformed == null) allowMalformed = _allowMalformed;
14 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits);
15 }
16 Utf8Encoder get encoder => new Utf8Encoder();
17 Utf8Decoder get decoder {
18 return new Utf8Decoder(allowMalformed: _allowMalformed);
19 }
20 }
21 class Utf8Encoder extends Converter<String, List<int>> {const Utf8Encoder();
22 List<int> convert(String string, [int start = 0, int end]) {
23 int stringLength = string.length;
24 RangeError.checkValidRange(start, end, stringLength);
25 if (end == null) end = stringLength;
26 int length = end - start;
27 if (length == 0) return new Uint8List(0);
28 _Utf8Encoder encoder = new _Utf8Encoder.withBufferSize(length * 3);
29 int endPosition = encoder._fillBuffer(string, start, end);
30 assert (endPosition >= end - 1); if (endPosition != end) {
31 int lastCodeUnit = string.codeUnitAt(end - 1);
32 assert (_isLeadSurrogate(lastCodeUnit)); bool wasCombined = encoder._writeSur rogate(lastCodeUnit, 0);
33 assert (!wasCombined);}
34 return encoder._buffer.sublist(0, encoder._bufferIndex);
35 }
36 StringConversionSink startChunkedConversion(Sink<List<int>> sink) {
37 if (sink is! ByteConversionSink) {
38 sink = new ByteConversionSink.from(sink);
39 }
40 return new _Utf8EncoderSink(DEVC$RT.cast(sink, DEVC$RT.type((Sink<List<int>> _) {
41 }
42 ), ByteConversionSink, "ImplicitCast", """line 125, column 33 of dart:convert/ut f.dart: """, sink is ByteConversionSink, true));
43 }
44 Stream<List<int>> bind(Stream<String> stream) => super.bind(stream);
45 }
46 class _Utf8Encoder {int _carry = 0;
47 int _bufferIndex = 0;
48 final List<int> _buffer;
49 static const _DEFAULT_BYTE_BUFFER_SIZE = 1024;
50 _Utf8Encoder() : this.withBufferSize(_DEFAULT_BYTE_BUFFER_SIZE);
51 _Utf8Encoder.withBufferSize(int bufferSize) : _buffer = _createBuffer(bufferSiz e);
52 static List<int> _createBuffer(int size) => new Uint8List(size);
53 bool _writeSurrogate(int leadingSurrogate, int nextCodeUnit) {
54 if (_isTailSurrogate(nextCodeUnit)) {
55 int rune = _combineSurrogatePair(leadingSurrogate, nextCodeUnit);
56 assert (rune > _THREE_BYTE_LIMIT); assert (rune <= _FOUR_BYTE_LIMIT); _buffer[_ bufferIndex++] = 0xF0 | (rune >> 18);
57 _buffer[_bufferIndex++] = 0x80 | ((rune >> 12) & 0x3f);
58 _buffer[_bufferIndex++] = 0x80 | ((rune >> 6) & 0x3f);
59 _buffer[_bufferIndex++] = 0x80 | (rune & 0x3f);
60 return true;
61 }
62 else {
63 _buffer[_bufferIndex++] = 0xE0 | (leadingSurrogate >> 12);
64 _buffer[_bufferIndex++] = 0x80 | ((leadingSurrogate >> 6) & 0x3f);
65 _buffer[_bufferIndex++] = 0x80 | (leadingSurrogate & 0x3f);
66 return false;
67 }
68 }
69 int _fillBuffer(String str, int start, int end) {
70 if (start != end && _isLeadSurrogate(str.codeUnitAt(end - 1))) {
71 end--;
72 }
73 int stringIndex;
74 for (stringIndex = start; stringIndex < end; stringIndex++) {
75 int codeUnit = str.codeUnitAt(stringIndex);
76 if (codeUnit <= _ONE_BYTE_LIMIT) {
77 if (_bufferIndex >= _buffer.length) break;
78 _buffer[_bufferIndex++] = codeUnit;
79 }
80 else if (_isLeadSurrogate(codeUnit)) {
81 if (_bufferIndex + 3 >= _buffer.length) break;
82 int nextCodeUnit = str.codeUnitAt(stringIndex + 1);
83 bool wasCombined = _writeSurrogate(codeUnit, nextCodeUnit);
84 if (wasCombined) stringIndex++;
85 }
86 else {
87 int rune = codeUnit;
88 if (rune <= _TWO_BYTE_LIMIT) {
89 if (_bufferIndex + 1 >= _buffer.length) break;
90 _buffer[_bufferIndex++] = 0xC0 | (rune >> 6);
91 _buffer[_bufferIndex++] = 0x80 | (rune & 0x3f);
92 }
93 else {
94 assert (rune <= _THREE_BYTE_LIMIT); if (_bufferIndex + 2 >= _buffer.length) break;
95 _buffer[_bufferIndex++] = 0xE0 | (rune >> 12);
96 _buffer[_bufferIndex++] = 0x80 | ((rune >> 6) & 0x3f);
97 _buffer[_bufferIndex++] = 0x80 | (rune & 0x3f);
98 }
99 }
100 }
101 return stringIndex;
102 }
103 }
104 class _Utf8EncoderSink extends _Utf8Encoder with StringConversionSinkMixin {fin al ByteConversionSink _sink;
105 _Utf8EncoderSink(this._sink);
106 void close() {
107 if (_carry != 0) {
108 addSlice("", 0, 0, true);
109 return;}
110 _sink.close();
111 }
112 void addSlice(String str, int start, int end, bool isLast) {
113 _bufferIndex = 0;
114 if (start == end && !isLast) {
115 return;}
116 if (_carry != 0) {
117 int nextCodeUnit = 0;
118 if (start != end) {
119 nextCodeUnit = str.codeUnitAt(start);
120 }
121 else {
122 assert (isLast);}
123 bool wasCombined = _writeSurrogate(_carry, nextCodeUnit);
124 assert (!wasCombined || start != end); if (wasCombined) start++;
125 _carry = 0;
126 }
127 do {
128 start = _fillBuffer(str, start, end);
129 bool isLastSlice = isLast && (start == end);
130 if (start == end - 1 && _isLeadSurrogate(str.codeUnitAt(start))) {
131 if (isLast && _bufferIndex < _buffer.length - 3) {
132 bool hasBeenCombined = _writeSurrogate(str.codeUnitAt(start), 0);
133 assert (!hasBeenCombined);}
134 else {
135 _carry = str.codeUnitAt(start);
136 }
137 start++;
138 }
139 _sink.addSlice(_buffer, 0, _bufferIndex, isLastSlice);
140 _bufferIndex = 0;
141 }
142 while (start < end); if (isLast) close();
143 }
144 }
145 class Utf8Decoder extends Converter<List<int>, String> {final bool _allowMalfor med;
146 const Utf8Decoder({
147 bool allowMalformed : false}
148 ) : this._allowMalformed = allowMalformed;
149 String convert(List<int> codeUnits, [int start = 0, int end]) {
150 int length = codeUnits.length;
151 RangeError.checkValidRange(start, end, length);
152 if (end == null) end = length;
153 StringBuffer buffer = new StringBuffer();
154 _Utf8Decoder decoder = new _Utf8Decoder(buffer, _allowMalformed);
155 decoder.convert(codeUnits, start, end);
156 decoder.close();
157 return buffer.toString();
158 }
159 ByteConversionSink startChunkedConversion(Sink<String> sink) {
160 StringConversionSink stringSink;
161 if (sink is StringConversionSink) {
162 stringSink = sink;
163 }
164 else {
165 stringSink = new StringConversionSink.from(sink);
166 }
167 return stringSink.asUtf8Sink(_allowMalformed);
168 }
169 Stream<String> bind(Stream<List<int>> stream) => super.bind(stream);
170 external Converter<List<int>, dynamic> fuse(Converter<String, dynamic> next);
171 }
172 const int _ONE_BYTE_LIMIT = 0x7f;
173 const int _TWO_BYTE_LIMIT = 0x7ff;
174 const int _THREE_BYTE_LIMIT = 0xffff;
175 const int _FOUR_BYTE_LIMIT = 0x10ffff;
176 const int _SURROGATE_MASK = 0xF800;
177 const int _SURROGATE_TAG_MASK = 0xFC00;
178 const int _SURROGATE_VALUE_MASK = 0x3FF;
179 const int _LEAD_SURROGATE_MIN = 0xD800;
180 const int _TAIL_SURROGATE_MIN = 0xDC00;
181 bool _isSurrogate(int codeUnit) => (codeUnit & _SURROGATE_MASK) == _LEAD_SURROG ATE_MIN;
182 bool _isLeadSurrogate(int codeUnit) => (codeUnit & _SURROGATE_TAG_MASK) == _LEA D_SURROGATE_MIN;
183 bool _isTailSurrogate(int codeUnit) => (codeUnit & _SURROGATE_TAG_MASK) == _TAI L_SURROGATE_MIN;
184 int _combineSurrogatePair(int lead, int tail) => 0x10000 + ((lead & _SURROGATE_ VALUE_MASK) << 10) | (tail & _SURROGATE_VALUE_MASK);
185 class _Utf8Decoder {final bool _allowMalformed;
186 final StringSink _stringSink;
187 bool _isFirstCharacter = true;
188 int _value = 0;
189 int _expectedUnits = 0;
190 int _extraUnits = 0;
191 _Utf8Decoder(this._stringSink, this._allowMalformed);
192 bool get hasPartialInput => _expectedUnits > 0;
193 static const List<int> _LIMITS = const <int> [_ONE_BYTE_LIMIT, _TWO_BYTE_LIMIT, _THREE_BYTE_LIMIT, _FOUR_BYTE_LIMIT];
194 void close() {
195 flush();
196 }
197 void flush() {
198 if (hasPartialInput) {
199 if (!_allowMalformed) {
200 throw new FormatException("Unfinished UTF-8 octet sequence");
201 }
202 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE);
203 _value = 0;
204 _expectedUnits = 0;
205 _extraUnits = 0;
206 }
207 }
208 void convert(List<int> codeUnits, int startIndex, int endIndex) {
209 int value = _value;
210 int expectedUnits = _expectedUnits;
211 int extraUnits = _extraUnits;
212 _value = 0;
213 _expectedUnits = 0;
214 _extraUnits = 0;
215 int scanOneByteCharacters(units, int from) {
216 final to = endIndex;
217 final mask = _ONE_BYTE_LIMIT;
218 for (var i = from; i < to; i++) {
219 final unit = units[i];
220 if ((unit & mask) != unit) return i - from;
221 }
222 return to - from;
223 }
224 void addSingleBytes(int from, int to) {
225 assert (from >= startIndex && from <= endIndex); assert (to >= startIndex && to <= endIndex); _stringSink.write(new String.fromCharCodes(codeUnits, from, to));
226 }
227 int i = startIndex;
228 loop: while (true) {
229 multibyte: if (expectedUnits > 0) {
230 do {
231 if (i == endIndex) {
232 break loop;
233 }
234 int unit = codeUnits[i];
235 if ((unit & 0xC0) != 0x80) {
236 expectedUnits = 0;
237 if (!_allowMalformed) {
238 throw new FormatException("Bad UTF-8 encoding 0x${unit.toRadixString(16)}");
239 }
240 _isFirstCharacter = false;
241 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE);
242 break multibyte;
243 }
244 else {
245 value = (value << 6) | (unit & 0x3f);
246 expectedUnits--;
247 i++;
248 }
249 }
250 while (expectedUnits > 0); if (value <= _LIMITS[extraUnits - 1]) {
251 if (!_allowMalformed) {
252 throw new FormatException("Overlong encoding of 0x${value.toRadixString(16)}");
253 }
254 expectedUnits = extraUnits = 0;
255 value = UNICODE_REPLACEMENT_CHARACTER_RUNE;
256 }
257 if (value > _FOUR_BYTE_LIMIT) {
258 if (!_allowMalformed) {
259 throw new FormatException("Character outside valid Unicode range: " "0x${value.t oRadixString(16)}");
260 }
261 value = UNICODE_REPLACEMENT_CHARACTER_RUNE;
262 }
263 if (!_isFirstCharacter || value != UNICODE_BOM_CHARACTER_RUNE) {
264 _stringSink.writeCharCode(value);
265 }
266 _isFirstCharacter = false;
267 }
268 while (i < endIndex) {
269 int oneBytes = scanOneByteCharacters(codeUnits, i);
270 if (oneBytes > 0) {
271 _isFirstCharacter = false;
272 addSingleBytes(i, i + oneBytes);
273 i += oneBytes;
274 if (i == endIndex) break;
275 }
276 int unit = codeUnits[i++];
277 if (unit < 0) {
278 if (!_allowMalformed) {
279 throw new FormatException("Negative UTF-8 code unit: -0x${(-unit).toRadixString( 16)}");
280 }
281 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE);
282 }
283 else {
284 assert (unit > _ONE_BYTE_LIMIT); if ((unit & 0xE0) == 0xC0) {
285 value = unit & 0x1F;
286 expectedUnits = extraUnits = 1;
287 continue loop;
288 }
289 if ((unit & 0xF0) == 0xE0) {
290 value = unit & 0x0F;
291 expectedUnits = extraUnits = 2;
292 continue loop;
293 }
294 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) {
295 value = unit & 0x07;
296 expectedUnits = extraUnits = 3;
297 continue loop;
298 }
299 if (!_allowMalformed) {
300 throw new FormatException("Bad UTF-8 encoding 0x${unit.toRadixString(16)}");
301 }
302 value = UNICODE_REPLACEMENT_CHARACTER_RUNE;
303 expectedUnits = extraUnits = 0;
304 _isFirstCharacter = false;
305 _stringSink.writeCharCode(value);
306 }
307 }
308 break loop;
309 }
310 if (expectedUnits > 0) {
311 _value = value;
312 _expectedUnits = expectedUnits;
313 _extraUnits = extraUnits;
314 }
315 }
316 }
OLDNEW
« no previous file with comments | « test/dart_codegen/expect/convert/string_conversion.dart ('k') | test/dart_codegen/expect/core/annotations.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698