test/dart_codegen/expect/convert/utf.dart - Issue 1148283010: Remove dart backend

Side by Side Diff: test/dart_codegen/expect/convert/utf.dart

Issue 1148283010: Remove dart backend (Closed) Base URL: https://github.com/dart-lang/dev_compiler.git@master

Patch Set: Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 part of dart.convert;

2 const int UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD;

3 const int UNICODE_BOM_CHARACTER_RUNE = 0xFEFF;

4 const Utf8Codec UTF8 = const Utf8Codec();

5 class Utf8Codec extends Encoding {final bool _allowMalformed;

6 const Utf8Codec({

7 bool allowMalformed : false}

8 ) : _allowMalformed = allowMalformed;

9 String get name => "utf-8";

10 String decode(List<int> codeUnits, {

11 bool allowMalformed}

12 ) {

13 if (allowMalformed == null) allowMalformed = _allowMalformed;

14 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits);

15 }

16 Utf8Encoder get encoder => new Utf8Encoder();

17 Utf8Decoder get decoder {

18 return new Utf8Decoder(allowMalformed: _allowMalformed);

19 }

20 }

21 class Utf8Encoder extends Converter<String, List<int>> {const Utf8Encoder();

22 List<int> convert(String string, [int start = 0, int end]) {

23 int stringLength = string.length;

24 RangeError.checkValidRange(start, end, stringLength);

25 if (end == null) end = stringLength;

26 int length = end - start;

27 if (length == 0) return new Uint8List(0);

28 _Utf8Encoder encoder = new _Utf8Encoder.withBufferSize(length * 3);

29 int endPosition = encoder._fillBuffer(string, start, end);

30 assert (endPosition >= end - 1); if (endPosition != end) {

31 int lastCodeUnit = string.codeUnitAt(end - 1);

32 assert (_isLeadSurrogate(lastCodeUnit)); bool wasCombined = encoder._writeSur rogate(lastCodeUnit, 0);

33 assert (!wasCombined);}

34 return encoder._buffer.sublist(0, encoder._bufferIndex);

35 }

36 StringConversionSink startChunkedConversion(Sink<List<int>> sink) {

37 if (sink is! ByteConversionSink) {

38 sink = new ByteConversionSink.from(sink);

39 }

40 return new _Utf8EncoderSink(DEVC$RT.cast(sink, DEVC$RT.type((Sink<List<int>> _) {

41 }

42 ), ByteConversionSink, "ImplicitCast", """line 125, column 33 of dart:convert/ut f.dart: """, sink is ByteConversionSink, true));

43 }

44 Stream<List<int>> bind(Stream<String> stream) => super.bind(stream);

45 }

46 class _Utf8Encoder {int _carry = 0;

47 int _bufferIndex = 0;

48 final List<int> _buffer;

49 static const _DEFAULT_BYTE_BUFFER_SIZE = 1024;

50 _Utf8Encoder() : this.withBufferSize(_DEFAULT_BYTE_BUFFER_SIZE);

51 _Utf8Encoder.withBufferSize(int bufferSize) : _buffer = _createBuffer(bufferSiz e);

52 static List<int> _createBuffer(int size) => new Uint8List(size);

53 bool _writeSurrogate(int leadingSurrogate, int nextCodeUnit) {

54 if (_isTailSurrogate(nextCodeUnit)) {

55 int rune = _combineSurrogatePair(leadingSurrogate, nextCodeUnit);

56 assert (rune > _THREE_BYTE_LIMIT); assert (rune <= _FOUR_BYTE_LIMIT); _buffer[_ bufferIndex++] = 0xF0 \| (rune >> 18);

57 _buffer[_bufferIndex++] = 0x80 \| ((rune >> 12) & 0x3f);

58 _buffer[_bufferIndex++] = 0x80 \| ((rune >> 6) & 0x3f);

59 _buffer[_bufferIndex++] = 0x80 \| (rune & 0x3f);

60 return true;

61 }

62 else {

63 _buffer[_bufferIndex++] = 0xE0 \| (leadingSurrogate >> 12);

64 _buffer[_bufferIndex++] = 0x80 \| ((leadingSurrogate >> 6) & 0x3f);

65 _buffer[_bufferIndex++] = 0x80 \| (leadingSurrogate & 0x3f);

66 return false;

67 }

68 }

69 int _fillBuffer(String str, int start, int end) {

70 if (start != end && _isLeadSurrogate(str.codeUnitAt(end - 1))) {

71 end--;

72 }

73 int stringIndex;

74 for (stringIndex = start; stringIndex < end; stringIndex++) {

75 int codeUnit = str.codeUnitAt(stringIndex);

76 if (codeUnit <= _ONE_BYTE_LIMIT) {

77 if (_bufferIndex >= _buffer.length) break;

78 _buffer[_bufferIndex++] = codeUnit;

79 }

80 else if (_isLeadSurrogate(codeUnit)) {

81 if (_bufferIndex + 3 >= _buffer.length) break;

82 int nextCodeUnit = str.codeUnitAt(stringIndex + 1);

83 bool wasCombined = _writeSurrogate(codeUnit, nextCodeUnit);

84 if (wasCombined) stringIndex++;

85 }

86 else {

87 int rune = codeUnit;

88 if (rune <= _TWO_BYTE_LIMIT) {

89 if (_bufferIndex + 1 >= _buffer.length) break;

90 _buffer[_bufferIndex++] = 0xC0 \| (rune >> 6);

91 _buffer[_bufferIndex++] = 0x80 \| (rune & 0x3f);

92 }

93 else {

94 assert (rune <= _THREE_BYTE_LIMIT); if (_bufferIndex + 2 >= _buffer.length) break;

95 _buffer[_bufferIndex++] = 0xE0 \| (rune >> 12);

96 _buffer[_bufferIndex++] = 0x80 \| ((rune >> 6) & 0x3f);

97 _buffer[_bufferIndex++] = 0x80 \| (rune & 0x3f);

98 }

99 }

100 }

101 return stringIndex;

102 }

103 }

104 class _Utf8EncoderSink extends _Utf8Encoder with StringConversionSinkMixin {fin al ByteConversionSink _sink;

105 _Utf8EncoderSink(this._sink);

106 void close() {

107 if (_carry != 0) {

108 addSlice("", 0, 0, true);

109 return;}

110 _sink.close();

111 }

112 void addSlice(String str, int start, int end, bool isLast) {

113 _bufferIndex = 0;

114 if (start == end && !isLast) {

115 return;}

116 if (_carry != 0) {

117 int nextCodeUnit = 0;

118 if (start != end) {

119 nextCodeUnit = str.codeUnitAt(start);

120 }

121 else {

122 assert (isLast);}

123 bool wasCombined = _writeSurrogate(_carry, nextCodeUnit);

124 assert (!wasCombined \|\| start != end); if (wasCombined) start++;

125 _carry = 0;

126 }

127 do {

128 start = _fillBuffer(str, start, end);

129 bool isLastSlice = isLast && (start == end);

130 if (start == end - 1 && _isLeadSurrogate(str.codeUnitAt(start))) {

131 if (isLast && _bufferIndex < _buffer.length - 3) {

132 bool hasBeenCombined = _writeSurrogate(str.codeUnitAt(start), 0);

133 assert (!hasBeenCombined);}

134 else {

135 _carry = str.codeUnitAt(start);

136 }

137 start++;

138 }

139 _sink.addSlice(_buffer, 0, _bufferIndex, isLastSlice);

140 _bufferIndex = 0;

141 }

142 while (start < end); if (isLast) close();

143 }

144 }

145 class Utf8Decoder extends Converter<List<int>, String> {final bool _allowMalfor med;

146 const Utf8Decoder({

147 bool allowMalformed : false}

148 ) : this._allowMalformed = allowMalformed;

149 String convert(List<int> codeUnits, [int start = 0, int end]) {

150 int length = codeUnits.length;

151 RangeError.checkValidRange(start, end, length);

152 if (end == null) end = length;

153 StringBuffer buffer = new StringBuffer();

154 _Utf8Decoder decoder = new _Utf8Decoder(buffer, _allowMalformed);

155 decoder.convert(codeUnits, start, end);

156 decoder.close();

157 return buffer.toString();

158 }

159 ByteConversionSink startChunkedConversion(Sink<String> sink) {

160 StringConversionSink stringSink;

161 if (sink is StringConversionSink) {

162 stringSink = sink;

163 }

164 else {

165 stringSink = new StringConversionSink.from(sink);

166 }

167 return stringSink.asUtf8Sink(_allowMalformed);

168 }

169 Stream<String> bind(Stream<List<int>> stream) => super.bind(stream);

170 external Converter<List<int>, dynamic> fuse(Converter<String, dynamic> next);

171 }

172 const int _ONE_BYTE_LIMIT = 0x7f;

173 const int _TWO_BYTE_LIMIT = 0x7ff;

174 const int _THREE_BYTE_LIMIT = 0xffff;

175 const int _FOUR_BYTE_LIMIT = 0x10ffff;

176 const int _SURROGATE_MASK = 0xF800;

177 const int _SURROGATE_TAG_MASK = 0xFC00;

178 const int _SURROGATE_VALUE_MASK = 0x3FF;

179 const int _LEAD_SURROGATE_MIN = 0xD800;

180 const int _TAIL_SURROGATE_MIN = 0xDC00;

181 bool _isSurrogate(int codeUnit) => (codeUnit & _SURROGATE_MASK) == _LEAD_SURROG ATE_MIN;

182 bool _isLeadSurrogate(int codeUnit) => (codeUnit & _SURROGATE_TAG_MASK) == _LEA D_SURROGATE_MIN;

183 bool _isTailSurrogate(int codeUnit) => (codeUnit & _SURROGATE_TAG_MASK) == _TAI L_SURROGATE_MIN;

184 int _combineSurrogatePair(int lead, int tail) => 0x10000 + ((lead & _SURROGATE_ VALUE_MASK) << 10) \| (tail & _SURROGATE_VALUE_MASK);

185 class _Utf8Decoder {final bool _allowMalformed;

186 final StringSink _stringSink;

187 bool _isFirstCharacter = true;

188 int _value = 0;

189 int _expectedUnits = 0;

190 int _extraUnits = 0;

191 _Utf8Decoder(this._stringSink, this._allowMalformed);

192 bool get hasPartialInput => _expectedUnits > 0;

193 static const List<int> _LIMITS = const <int> [_ONE_BYTE_LIMIT, _TWO_BYTE_LIMIT, _THREE_BYTE_LIMIT, _FOUR_BYTE_LIMIT];

194 void close() {

195 flush();

196 }

197 void flush() {

198 if (hasPartialInput) {

199 if (!_allowMalformed) {

200 throw new FormatException("Unfinished UTF-8 octet sequence");

201 }

202 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE);

203 _value = 0;

204 _expectedUnits = 0;

205 _extraUnits = 0;

206 }

207 }

208 void convert(List<int> codeUnits, int startIndex, int endIndex) {

209 int value = _value;

210 int expectedUnits = _expectedUnits;

211 int extraUnits = _extraUnits;

212 _value = 0;

213 _expectedUnits = 0;

214 _extraUnits = 0;

215 int scanOneByteCharacters(units, int from) {

216 final to = endIndex;

217 final mask = _ONE_BYTE_LIMIT;

218 for (var i = from; i < to; i++) {

219 final unit = units[i];

220 if ((unit & mask) != unit) return i - from;

221 }

222 return to - from;

223 }

224 void addSingleBytes(int from, int to) {

225 assert (from >= startIndex && from <= endIndex); assert (to >= startIndex && to <= endIndex); _stringSink.write(new String.fromCharCodes(codeUnits, from, to));

226 }

227 int i = startIndex;

228 loop: while (true) {

229 multibyte: if (expectedUnits > 0) {

230 do {

231 if (i == endIndex) {

232 break loop;

233 }

234 int unit = codeUnits[i];

235 if ((unit & 0xC0) != 0x80) {

236 expectedUnits = 0;

237 if (!_allowMalformed) {

238 throw new FormatException("Bad UTF-8 encoding 0x${unit.toRadixString(16)}");

239 }

240 _isFirstCharacter = false;

241 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE);

242 break multibyte;

243 }

244 else {

245 value = (value << 6) \| (unit & 0x3f);

246 expectedUnits--;

247 i++;

248 }

249 }

250 while (expectedUnits > 0); if (value <= _LIMITS[extraUnits - 1]) {

251 if (!_allowMalformed) {

252 throw new FormatException("Overlong encoding of 0x${value.toRadixString(16)}");

253 }

254 expectedUnits = extraUnits = 0;

255 value = UNICODE_REPLACEMENT_CHARACTER_RUNE;

256 }

257 if (value > _FOUR_BYTE_LIMIT) {

258 if (!_allowMalformed) {

259 throw new FormatException("Character outside valid Unicode range: " "0x${value.t oRadixString(16)}");

260 }

261 value = UNICODE_REPLACEMENT_CHARACTER_RUNE;

262 }

263 if (!_isFirstCharacter \|\| value != UNICODE_BOM_CHARACTER_RUNE) {

264 _stringSink.writeCharCode(value);

265 }

266 _isFirstCharacter = false;

267 }

268 while (i < endIndex) {

269 int oneBytes = scanOneByteCharacters(codeUnits, i);

270 if (oneBytes > 0) {

271 _isFirstCharacter = false;

272 addSingleBytes(i, i + oneBytes);

273 i += oneBytes;

274 if (i == endIndex) break;

275 }

276 int unit = codeUnits[i++];

277 if (unit < 0) {

278 if (!_allowMalformed) {

279 throw new FormatException("Negative UTF-8 code unit: -0x${(-unit).toRadixString( 16)}");

280 }

281 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE);

282 }

283 else {

284 assert (unit > _ONE_BYTE_LIMIT); if ((unit & 0xE0) == 0xC0) {

285 value = unit & 0x1F;

286 expectedUnits = extraUnits = 1;

287 continue loop;

288 }

289 if ((unit & 0xF0) == 0xE0) {

290 value = unit & 0x0F;

291 expectedUnits = extraUnits = 2;

292 continue loop;

293 }

294 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) {

295 value = unit & 0x07;

296 expectedUnits = extraUnits = 3;

297 continue loop;

298 }

299 if (!_allowMalformed) {

300 throw new FormatException("Bad UTF-8 encoding 0x${unit.toRadixString(16)}");

301 }

302 value = UNICODE_REPLACEMENT_CHARACTER_RUNE;

303 expectedUnits = extraUnits = 0;

304 _isFirstCharacter = false;

305 _stringSink.writeCharCode(value);

306 }

307 }

308 break loop;

309 }

310 if (expectedUnits > 0) {

311 _value = value;

312 _expectedUnits = expectedUnits;

313 _extraUnits = extraUnits;

314 }

315 }

316 }

OLD	NEW

« no previous file with comments | « test/dart_codegen/expect/convert/string_conversion.dart ('k') | test/dart_codegen/expect/core/annotations.dart » ('j') | no next file with comments »