packages/utf/lib/src/utf16.dart - Issue 2989763002: Update charted to 0.4.8 and roll

Side by Side Diff: packages/utf/lib/src/utf16.dart

Issue 2989763002: Update charted to 0.4.8 and roll (Closed)

Patch Set: Removed Cutch from list of reviewers Created 3 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 part of utf;	5 library utf.utf16;

6	6

7 // TODO(jmesserly): would be nice to have this on String (dartbug.com/6501).	7 import "dart:collection";

8 /**	8

9 * Provide a list of Unicode codepoints for a given string.	9 import 'constants.dart';

10 */	10 import 'list_range.dart';

11 List<int> stringToCodepoints(String str) {	11 import 'utf_16_code_unit_decoder.dart';

12 // Note: str.codeUnits gives us 16-bit code units on all Dart implementations.	12 import 'util.dart';

13 // So we need to convert.

14 return utf16CodeUnitsToCodepoints(str.codeUnits);

15 }

16	13

17 /**	14 /**

18 * Generate a string from the provided Unicode codepoints.	15 * Generate a string from the provided Unicode codepoints.

19 *	16 *

20 * Deprecated Use [String.fromCharCodes] instead.	17 * Deprecated Use [String.fromCharCodes] instead.

21 */	18 */

22 @deprecated	19 @deprecated

23 String codepointsToString(List<int> codepoints) {	20 String codepointsToString(List<int> codepoints) {

24 return new String.fromCharCodes(codepoints);	21 return new String.fromCharCodes(codepoints);

25 }	22 }

	23

26 /**	24 /**

27 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert	25 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert

28 * as much of the input as needed. Determines the byte order from the BOM,	26 * as much of the input as needed. Determines the byte order from the BOM,

29 * or uses big-endian as a default. This method always strips a leading BOM.	27 * or uses big-endian as a default. This method always strips a leading BOM.

30 * Set the [replacementCodepoint] to null to throw an ArgumentError	28 * Set the [replacementCodepoint] to null to throw an ArgumentError

31 * rather than replace the bad value. The default value for	29 * rather than replace the bad value. The default value for

32 * [replacementCodepoint] is U+FFFD.	30 * [replacementCodepoint] is U+FFFD.

33 */	31 */

34 IterableUtf16Decoder decodeUtf16AsIterable(List<int> bytes, [int offset = 0,	32 IterableUtf16Decoder decodeUtf16AsIterable(List<int> bytes,

35 int length, int replacementCodepoint =	33 [int offset = 0,

36 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	34 int length,

	35 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

37 return new IterableUtf16Decoder._(	36 return new IterableUtf16Decoder._(

38 () => new Utf16BytesToCodeUnitsDecoder(bytes, offset, length,	37 () => new Utf16BytesToCodeUnitsDecoder(

39 replacementCodepoint), replacementCodepoint);	38 bytes, offset, length, replacementCodepoint),

	39 replacementCodepoint);

40 }	40 }

41	41

42 /**	42 /**

43 * Decodes the UTF-16BE bytes as an iterable. Thus, the consumer can only	43 * Decodes the UTF-16BE bytes as an iterable. Thus, the consumer can only

44 * convert as much of the input as needed. This method strips a leading BOM by	44 * convert as much of the input as needed. This method strips a leading BOM by

45 * default, but can be overridden by setting the optional parameter [stripBom]	45 * default, but can be overridden by setting the optional parameter [stripBom]

46 * to false. Set the [replacementCodepoint] to null to throw an	46 * to false. Set the [replacementCodepoint] to null to throw an

47 * ArgumentError rather than replace the bad value. The default	47 * ArgumentError rather than replace the bad value. The default

48 * value for the [replacementCodepoint] is U+FFFD.	48 * value for the [replacementCodepoint] is U+FFFD.

49 */	49 */

50 IterableUtf16Decoder decodeUtf16beAsIterable(List<int> bytes, [int offset = 0,	50 IterableUtf16Decoder decodeUtf16beAsIterable(List<int> bytes,

51 int length, bool stripBom = true, int replacementCodepoint =	51 [int offset = 0,

52 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	52 int length,

	53 bool stripBom = true,

	54 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

53 return new IterableUtf16Decoder._(	55 return new IterableUtf16Decoder._(

54 () => new Utf16beBytesToCodeUnitsDecoder(bytes, offset, length, stripBom,	56 () => new Utf16beBytesToCodeUnitsDecoder(

55 replacementCodepoint), replacementCodepoint);	57 bytes, offset, length, stripBom, replacementCodepoint),

	58 replacementCodepoint);

56 }	59 }

57	60

58 /**	61 /**

59 * Decodes the UTF-16LE bytes as an iterable. Thus, the consumer can only	62 * Decodes the UTF-16LE bytes as an iterable. Thus, the consumer can only

60 * convert as much of the input as needed. This method strips a leading BOM by	63 * convert as much of the input as needed. This method strips a leading BOM by

61 * default, but can be overridden by setting the optional parameter [stripBom]	64 * default, but can be overridden by setting the optional parameter [stripBom]

62 * to false. Set the [replacementCodepoint] to null to throw an	65 * to false. Set the [replacementCodepoint] to null to throw an

63 * ArgumentError rather than replace the bad value. The default	66 * ArgumentError rather than replace the bad value. The default

64 * value for the [replacementCodepoint] is U+FFFD.	67 * value for the [replacementCodepoint] is U+FFFD.

65 */	68 */

66 IterableUtf16Decoder decodeUtf16leAsIterable(List<int> bytes, [int offset = 0,	69 IterableUtf16Decoder decodeUtf16leAsIterable(List<int> bytes,

67 int length, bool stripBom = true, int replacementCodepoint =	70 [int offset = 0,

68 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	71 int length,

	72 bool stripBom = true,

	73 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

69 return new IterableUtf16Decoder._(	74 return new IterableUtf16Decoder._(

70 () => new Utf16leBytesToCodeUnitsDecoder(bytes, offset, length, stripBom,	75 () => new Utf16leBytesToCodeUnitsDecoder(

71 replacementCodepoint), replacementCodepoint);	76 bytes, offset, length, stripBom, replacementCodepoint),

	77 replacementCodepoint);

72 }	78 }

73	79

74 /**	80 /**

75 * Produce a String from a sequence of UTF-16 encoded bytes. This method always	81 * Produce a String from a sequence of UTF-16 encoded bytes. This method always

76 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an	82 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an

77 * ArgumentError rather than replace the bad value. The default	83 * ArgumentError rather than replace the bad value. The default

78 * value for the [replacementCodepoint] is U+FFFD.	84 * value for the [replacementCodepoint] is U+FFFD.

79 */	85 */

80 String decodeUtf16(List<int> bytes, [int offset = 0, int length,	86 String decodeUtf16(List<int> bytes,

	87 [int offset = 0,

	88 int length,

81 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	89 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

82 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes,	90 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(

83 offset, length, replacementCodepoint);	91 bytes, offset, length, replacementCodepoint);

84 List<int> codeunits = decoder.decodeRest();	92 List<int> codeunits = decoder.decodeRest();

85 return new String.fromCharCodes(	93 return new String.fromCharCodes(

86 utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));	94 utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));

87 }	95 }

88	96

89 /**	97 /**

90 * Produce a String from a sequence of UTF-16BE encoded bytes. This method	98 * Produce a String from a sequence of UTF-16BE encoded bytes. This method

91 * strips a leading BOM by default, but can be overridden by setting the	99 * strips a leading BOM by default, but can be overridden by setting the

92 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to	100 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to

93 * null to throw an ArgumentError rather than replace the bad value.	101 * null to throw an ArgumentError rather than replace the bad value.

94 * The default value for the [replacementCodepoint] is U+FFFD.	102 * The default value for the [replacementCodepoint] is U+FFFD.

95 */	103 */

96 String decodeUtf16be(List<int> bytes, [int offset = 0, int length,	104 String decodeUtf16be(List<int> bytes,

	105 [int offset = 0,

	106 int length,

97 bool stripBom = true,	107 bool stripBom = true,

98 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	108 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

99 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset,	109 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(

100 length, stripBom, replacementCodepoint)).decodeRest();	110 bytes, offset, length, stripBom, replacementCodepoint))

	111 .decodeRest();

101 return new String.fromCharCodes(	112 return new String.fromCharCodes(

102 utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));	113 utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));

103 }	114 }

104	115

105 /**	116 /**

106 * Produce a String from a sequence of UTF-16LE encoded bytes. This method	117 * Produce a String from a sequence of UTF-16LE encoded bytes. This method

107 * strips a leading BOM by default, but can be overridden by setting the	118 * strips a leading BOM by default, but can be overridden by setting the

108 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to	119 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to

109 * null to throw an ArgumentError rather than replace the bad value.	120 * null to throw an ArgumentError rather than replace the bad value.

110 * The default value for the [replacementCodepoint] is U+FFFD.	121 * The default value for the [replacementCodepoint] is U+FFFD.

111 */	122 */

112 String decodeUtf16le(List<int> bytes, [int offset = 0, int length,	123 String decodeUtf16le(List<int> bytes,

	124 [int offset = 0,

	125 int length,

113 bool stripBom = true,	126 bool stripBom = true,

114 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	127 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

115 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset,	128 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(

116 length, stripBom, replacementCodepoint)).decodeRest();	129 bytes, offset, length, stripBom, replacementCodepoint))

	130 .decodeRest();

117 return new String.fromCharCodes(	131 return new String.fromCharCodes(

118 utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));	132 utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));

119 }	133 }

120	134

121 /**	135 /**

122 * Produce a list of UTF-16 encoded bytes. This method prefixes the resulting	136 * Produce a list of UTF-16 encoded bytes. This method prefixes the resulting

123 * bytes with a big-endian byte-order-marker.	137 * bytes with a big-endian byte-order-marker.

124 */	138 */

125 List<int> encodeUtf16(String str) =>	139 List<int> encodeUtf16(String str) => encodeUtf16be(str, true);

126 encodeUtf16be(str, true);

127	140

128 /**	141 /**

129 * Produce a list of UTF-16BE encoded bytes. By default, this method produces	142 * Produce a list of UTF-16BE encoded bytes. By default, this method produces

130 * UTF-16BE bytes with no BOM.	143 * UTF-16BE bytes with no BOM.

131 */	144 */

132 List<int> encodeUtf16be(String str, [bool writeBOM = false]) {	145 List<int> encodeUtf16be(String str, [bool writeBOM = false]) {

133 List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str);	146 List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str);

134 List<int> encoding =	147 List<int> encoding =

135 new List<int>(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0));	148 new List<int>(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0));

136 int i = 0;	149 int i = 0;

(...skipping 72 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
209 */	222 */

210 // TODO(floitsch): Consider removing the extend and switch to implements since	223 // TODO(floitsch): Consider removing the extend and switch to implements since

211 // that's cheaper to allocate.	224 // that's cheaper to allocate.

212 class IterableUtf16Decoder extends IterableBase<int> {	225 class IterableUtf16Decoder extends IterableBase<int> {

213 final _CodeUnitsProvider codeunitsProvider;	226 final _CodeUnitsProvider codeunitsProvider;

214 final int replacementCodepoint;	227 final int replacementCodepoint;

215	228

216 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint);	229 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint);

217	230

218 Utf16CodeUnitDecoder get iterator =>	231 Utf16CodeUnitDecoder get iterator =>

219 new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(),	232 new Utf16CodeUnitDecoder.fromListRangeIterator(

220 replacementCodepoint);	233 codeunitsProvider(), replacementCodepoint);

221 }	234 }

222	235

223 /**	236 /**

224 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes	237 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes

225 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine	238 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine

226 * endian-ness, and defaults to BE.	239 * endian-ness, and defaults to BE.

227 */	240 */

228 abstract class Utf16BytesToCodeUnitsDecoder implements ListRangeIterator {	241 abstract class Utf16BytesToCodeUnitsDecoder implements ListRangeIterator {

229 // TODO(kevmoo): should this field be private?	242 // TODO(kevmoo): should this field be private?

230 final ListRangeIterator utf16EncodedBytesIterator;	243 final ListRangeIterator utf16EncodedBytesIterator;

231 final int replacementCodepoint;	244 final int replacementCodepoint;

232 int _current = null;	245 int _current = null;

233	246

234 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator(	247 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator(

235 this.utf16EncodedBytesIterator, this.replacementCodepoint);	248 this.utf16EncodedBytesIterator, this.replacementCodepoint);

236	249

237 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [	250 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes,

238 int offset = 0, int length,	251 [int offset = 0,

	252 int length,

239 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	253 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

240 if (length == null) {	254 if (length == null) {

241 length = utf16EncodedBytes.length - offset;	255 length = utf16EncodedBytes.length - offset;

242 }	256 }

243 if (hasUtf16beBom(utf16EncodedBytes, offset, length)) {	257 if (hasUtf16beBom(utf16EncodedBytes, offset, length)) {

244 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2,	258 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2,

245 length - 2, false, replacementCodepoint);	259 length - 2, false, replacementCodepoint);

246 } else if (hasUtf16leBom(utf16EncodedBytes, offset, length)) {	260 } else if (hasUtf16leBom(utf16EncodedBytes, offset, length)) {

247 return new Utf16leBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2,	261 return new Utf16leBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2,

248 length - 2, false, replacementCodepoint);	262 length - 2, false, replacementCodepoint);

249 } else {	263 } else {

250 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset,	264 return new Utf16beBytesToCodeUnitsDecoder(

251 length, false, replacementCodepoint);	265 utf16EncodedBytes, offset, length, false, replacementCodepoint);

252 }	266 }

253 }	267 }

254	268

255 /**	269 /**

256 * Provides a fast way to decode the rest of the source bytes in a single	270 * Provides a fast way to decode the rest of the source bytes in a single

257 * call. This method trades memory for improved speed in that it potentially	271 * call. This method trades memory for improved speed in that it potentially

258 * over-allocates the List containing results.	272 * over-allocates the List containing results.

259 */	273 */

260 List<int> decodeRest() {	274 List<int> decodeRest() {

261 List<int> codeunits = new List<int>(remaining);	275 List<int> codeunits = new List<int>(remaining);

(...skipping 46 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
308 }	322 }

309	323

310 int decode();	324 int decode();

311 }	325 }

312	326

313 /**	327 /**

314 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes	328 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes

315 * to produce the code unit (0-(2^16)-1).	329 * to produce the code unit (0-(2^16)-1).

316 */	330 */

317 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {	331 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {

318 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [	332 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes,

319 int offset = 0, int length, bool stripBom = true,	333 [int offset = 0,

320 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :	334 int length,

321 super._fromListRangeIterator(	335 bool stripBom = true,

322 (new ListRange(utf16EncodedBytes, offset, length)).iterator,	336 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])

323 replacementCodepoint) {	337 : super._fromListRangeIterator(

	338 (new ListRange(utf16EncodedBytes, offset, length)).iterator,

	339 replacementCodepoint) {

324 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) {	340 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) {

325 skip();	341 skip();

326 }	342 }

327 }	343 }

328	344

329 int decode() {	345 int decode() {

330 utf16EncodedBytesIterator.moveNext();	346 utf16EncodedBytesIterator.moveNext();

331 int hi = utf16EncodedBytesIterator.current;	347 int hi = utf16EncodedBytesIterator.current;

332 utf16EncodedBytesIterator.moveNext();	348 utf16EncodedBytesIterator.moveNext();

333 int lo = utf16EncodedBytesIterator.current;	349 int lo = utf16EncodedBytesIterator.current;

334 return (hi << 8) + lo;	350 return (hi << 8) + lo;

335 }	351 }

336 }	352 }

337	353

338 /**	354 /**

339 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes	355 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes

340 * to produce the code unit (0-(2^16)-1).	356 * to produce the code unit (0-(2^16)-1).

341 */	357 */

342 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {	358 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {

343 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [	359 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes,

344 int offset = 0, int length, bool stripBom = true,	360 [int offset = 0,

345 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :	361 int length,

346 super._fromListRangeIterator(	362 bool stripBom = true,

347 (new ListRange(utf16EncodedBytes, offset, length)).iterator,	363 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])

348 replacementCodepoint) {	364 : super._fromListRangeIterator(

	365 (new ListRange(utf16EncodedBytes, offset, length)).iterator,

	366 replacementCodepoint) {

349 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) {	367 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) {

350 skip();	368 skip();

351 }	369 }

352 }	370 }

353	371

354 int decode() {	372 int decode() {

355 utf16EncodedBytesIterator.moveNext();	373 utf16EncodedBytesIterator.moveNext();

356 int lo = utf16EncodedBytesIterator.current;	374 int lo = utf16EncodedBytesIterator.current;

357 utf16EncodedBytesIterator.moveNext();	375 utf16EncodedBytesIterator.moveNext();

358 int hi = utf16EncodedBytesIterator.current;	376 int hi = utf16EncodedBytesIterator.current;

359 return (hi << 8) + lo;	377 return (hi << 8) + lo;

360 }	378 }

361 }	379 }

OLD	NEW

« no previous file with comments | « packages/utf/lib/src/utf/utf_stream.dart ('k') | packages/utf/lib/src/utf32.dart » ('j') | no next file with comments »