sdk/lib/utf/utf16.dart - Issue 11418115: Fix Unicode issues in dart2js and dart2dart.

Side by Side Diff: sdk/lib/utf/utf16.dart

Issue 11418115: Fix Unicode issues in dart2js and dart2dart. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Created 8 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5	5

6 /**	6 /**

7 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert	7 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert

8 * as much of the input as needed. Determines the byte order from the BOM,	8 * as much of the input as needed. Determines the byte order from the BOM,

9 * or uses big-endian as a default. This method always strips a leading BOM.	9 * or uses big-endian as a default. This method always strips a leading BOM.

10 * Set the [replacementCodepoint] to null to throw an ArgumentError	10 * Set the [replacementCodepoint] to null to throw an ArgumentError

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
55 * Produce a String from a sequence of UTF-16 encoded bytes. This method always	55 * Produce a String from a sequence of UTF-16 encoded bytes. This method always

56 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an	56 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an

57 * ArgumentError rather than replace the bad value. The default	57 * ArgumentError rather than replace the bad value. The default

58 * value for the [replacementCodepoint] is U+FFFD.	58 * value for the [replacementCodepoint] is U+FFFD.

59 */	59 */

60 String decodeUtf16(List<int> bytes, [int offset = 0, int length,	60 String decodeUtf16(List<int> bytes, [int offset = 0, int length,

61 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	61 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

62 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes,	62 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes,

63 offset, length, replacementCodepoint);	63 offset, length, replacementCodepoint);

64 List<int> codeunits = decoder.decodeRest();	64 List<int> codeunits = decoder.decodeRest();

65 // TODO is16BitCodeUnit() is used to work around a bug with dart2js	65 return new String.fromCharCodes(

66 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider	66 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));

67 // removing after this issue is resolved.

68 if (_is16BitCodeUnit()) {

69 return new String.fromCharCodes(codeunits);

70 } else {

71 return new String.fromCharCodes(

72 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));

73 }

74 }	67 }

75	68

76 /**	69 /**

77 * Produce a String from a sequence of UTF-16BE encoded bytes. This method	70 * Produce a String from a sequence of UTF-16BE encoded bytes. This method

78 * strips a leading BOM by default, but can be overridden by setting the	71 * strips a leading BOM by default, but can be overridden by setting the

79 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to	72 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to

80 * null to throw an ArgumentError rather than replace the bad value.	73 * null to throw an ArgumentError rather than replace the bad value.

81 * The default value for the [replacementCodepoint] is U+FFFD.	74 * The default value for the [replacementCodepoint] is U+FFFD.

82 */	75 */

83 String decodeUtf16be(List<int> bytes, [int offset = 0, int length,	76 String decodeUtf16be(List<int> bytes, [int offset = 0, int length,

84 bool stripBom = true,	77 bool stripBom = true,

85 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	78 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

86 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset,	79 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset,

87 length, stripBom, replacementCodepoint)).decodeRest();	80 length, stripBom, replacementCodepoint)).decodeRest();

88 // TODO is16BitCodeUnit() is used to work around a bug with dart2js	81 // TODO is16BitCodeUnit() is used to work around a bug with dart2js

89 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider	82 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider

90 // removing after this issue is resolved.	83 // removing after this issue is resolved.

91 if (_is16BitCodeUnit()) {	84 return new String.fromCharCodes(

92 return new String.fromCharCodes(codeunits);	85 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));

93 } else {

94 return new String.fromCharCodes(

95 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));

96 }

97 }	86 }

98	87

99 /**	88 /**

100 * Produce a String from a sequence of UTF-16LE encoded bytes. This method	89 * Produce a String from a sequence of UTF-16LE encoded bytes. This method

101 * strips a leading BOM by default, but can be overridden by setting the	90 * strips a leading BOM by default, but can be overridden by setting the

102 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to	91 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to

103 * null to throw an ArgumentError rather than replace the bad value.	92 * null to throw an ArgumentError rather than replace the bad value.

104 * The default value for the [replacementCodepoint] is U+FFFD.	93 * The default value for the [replacementCodepoint] is U+FFFD.

105 */	94 */

106 String decodeUtf16le(List<int> bytes, [int offset = 0, int length,	95 String decodeUtf16le(List<int> bytes, [int offset = 0, int length,

107 bool stripBom = true,	96 bool stripBom = true,

108 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	97 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

109 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset,	98 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset,

110 length, stripBom, replacementCodepoint)).decodeRest();	99 length, stripBom, replacementCodepoint)).decodeRest();

111 // TODO is16BitCodeUnit() is used to work around a bug with dart2js	100 // TODO is16BitCodeUnit() is used to work around a bug with dart2js

112 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider	101 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider

113 // removing after this issue is resolved.	102 // removing after this issue is resolved.

114 if (_is16BitCodeUnit()) {	103 return new String.fromCharCodes(

115 return new String.fromCharCodes(codeunits);	104 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));

116 } else {

117 return new String.fromCharCodes(

118 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));

119 }

120 }	105 }

121	106

122 /**	107 /**

123 * Produce a list of UTF-16 encoded bytes. This method prefixes the resulting	108 * Produce a list of UTF-16 encoded bytes. This method prefixes the resulting

124 * bytes with a big-endian byte-order-marker.	109 * bytes with a big-endian byte-order-marker.

125 */	110 */

126 List<int> encodeUtf16(String str) =>	111 List<int> encodeUtf16(String str) =>

127 encodeUtf16be(str, true);	112 encodeUtf16be(str, true);

128	113

129 /**	114 /**

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
194 int end = length != null ? offset + length : utf16EncodedBytes.length;	179 int end = length != null ? offset + length : utf16EncodedBytes.length;

195 return (offset + 2) <= end &&	180 return (offset + 2) <= end &&

196 utf16EncodedBytes[offset] == UNICODE_UTF_BOM_LO &&	181 utf16EncodedBytes[offset] == UNICODE_UTF_BOM_LO &&

197 utf16EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI;	182 utf16EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI;

198 }	183 }

199	184

200 List<int> _stringToUtf16CodeUnits(String str) {	185 List<int> _stringToUtf16CodeUnits(String str) {

201 // TODO is16BitCodeUnit() is used to work around a bug with dart2js	186 // TODO is16BitCodeUnit() is used to work around a bug with dart2js

202 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider	187 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider

203 // removing after this issue is resolved.	188 // removing after this issue is resolved.

204 if (_is16BitCodeUnit()) {	189 return _codepointsToUtf16CodeUnits(str.charCodes);

205 return str.charCodes;

206 } else {

207 return _codepointsToUtf16CodeUnits(str.charCodes);

208 }

209 }	190 }

210	191

211 typedef _ListRangeIterator _CodeUnitsProvider();	192 typedef _ListRangeIterator _CodeUnitsProvider();

212	193

213 /**	194 /**

214 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type	195 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type

215 * provides an iterator on demand and the iterator will only translate bytes	196 * provides an iterator on demand and the iterator will only translate bytes

216 * as requested by the user of the iterator. (Note: results are not cached.)	197 * as requested by the user of the iterator. (Note: results are not cached.)

217 */	198 */

218 class IterableUtf16Decoder implements Iterable<int> {	199 class IterableUtf16Decoder implements Iterable<int> {

(...skipping 124 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
343 skip();	324 skip();

344 }	325 }

345 }	326 }

346	327

347 int decode() {	328 int decode() {

348 int lo = utf16EncodedBytesIterator.next();	329 int lo = utf16EncodedBytesIterator.next();

349 int hi = utf16EncodedBytesIterator.next();	330 int hi = utf16EncodedBytesIterator.next();

350 return (hi << 8) + lo;	331 return (hi << 8) + lo;

351 }	332 }

352 }	333 }

OLD	NEW

« sdk/lib/_internal/compiler/implementation/util/util.dart ('K') | « sdk/lib/io/string_stream.dart ('k') | sdk/lib/utf/utf_core.dart » ('j') | no next file with comments »