sdk/lib/utf/utf16.dart - Issue 11418115: Fix Unicode issues in dart2js and dart2dart.

Side by Side Diff: sdk/lib/utf/utf16.dart

Issue 11418115: Fix Unicode issues in dart2js and dart2dart. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Remove accidental test expectation dupe Created 8 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5	5

6 /**	6 /**

7 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert	7 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert

8 * as much of the input as needed. Determines the byte order from the BOM,	8 * as much of the input as needed. Determines the byte order from the BOM,

9 * or uses big-endian as a default. This method always strips a leading BOM.	9 * or uses big-endian as a default. This method always strips a leading BOM.

10 * Set the [replacementCodepoint] to null to throw an ArgumentError	10 * Set the [replacementCodepoint] to null to throw an ArgumentError

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
55 * Produce a String from a sequence of UTF-16 encoded bytes. This method always	55 * Produce a String from a sequence of UTF-16 encoded bytes. This method always

56 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an	56 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an

57 * ArgumentError rather than replace the bad value. The default	57 * ArgumentError rather than replace the bad value. The default

58 * value for the [replacementCodepoint] is U+FFFD.	58 * value for the [replacementCodepoint] is U+FFFD.

59 */	59 */

60 String decodeUtf16(List<int> bytes, [int offset = 0, int length,	60 String decodeUtf16(List<int> bytes, [int offset = 0, int length,

61 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	61 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

62 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes,	62 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes,

63 offset, length, replacementCodepoint);	63 offset, length, replacementCodepoint);

64 List<int> codeunits = decoder.decodeRest();	64 List<int> codeunits = decoder.decodeRest();

65 // TODO is16BitCodeUnit() is used to work around a bug with dart2js	65 return new String.fromCharCodes(

66 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider	66 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));

67 // removing after this issue is resolved.

68 if (_is16BitCodeUnit()) {

69 return new String.fromCharCodes(codeunits);

70 } else {

71 return new String.fromCharCodes(

72 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));

73 }

74 }	67 }

75	68

76 /**	69 /**

77 * Produce a String from a sequence of UTF-16BE encoded bytes. This method	70 * Produce a String from a sequence of UTF-16BE encoded bytes. This method

78 * strips a leading BOM by default, but can be overridden by setting the	71 * strips a leading BOM by default, but can be overridden by setting the

79 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to	72 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to

80 * null to throw an ArgumentError rather than replace the bad value.	73 * null to throw an ArgumentError rather than replace the bad value.

81 * The default value for the [replacementCodepoint] is U+FFFD.	74 * The default value for the [replacementCodepoint] is U+FFFD.

82 */	75 */

83 String decodeUtf16be(List<int> bytes, [int offset = 0, int length,	76 String decodeUtf16be(List<int> bytes, [int offset = 0, int length,

84 bool stripBom = true,	77 bool stripBom = true,

85 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	78 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

86 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset,	79 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset,

87 length, stripBom, replacementCodepoint)).decodeRest();	80 length, stripBom, replacementCodepoint)).decodeRest();

88 // TODO is16BitCodeUnit() is used to work around a bug with dart2js	81 return new String.fromCharCodes(

89 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider	82 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));

90 // removing after this issue is resolved.

91 if (_is16BitCodeUnit()) {

92 return new String.fromCharCodes(codeunits);

93 } else {

94 return new String.fromCharCodes(

95 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));

96 }

97 }	83 }

98	84

99 /**	85 /**

100 * Produce a String from a sequence of UTF-16LE encoded bytes. This method	86 * Produce a String from a sequence of UTF-16LE encoded bytes. This method

101 * strips a leading BOM by default, but can be overridden by setting the	87 * strips a leading BOM by default, but can be overridden by setting the

102 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to	88 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to

103 * null to throw an ArgumentError rather than replace the bad value.	89 * null to throw an ArgumentError rather than replace the bad value.

104 * The default value for the [replacementCodepoint] is U+FFFD.	90 * The default value for the [replacementCodepoint] is U+FFFD.

105 */	91 */

106 String decodeUtf16le(List<int> bytes, [int offset = 0, int length,	92 String decodeUtf16le(List<int> bytes, [int offset = 0, int length,

107 bool stripBom = true,	93 bool stripBom = true,

108 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	94 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

109 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset,	95 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset,

110 length, stripBom, replacementCodepoint)).decodeRest();	96 length, stripBom, replacementCodepoint)).decodeRest();

111 // TODO is16BitCodeUnit() is used to work around a bug with dart2js	97 return new String.fromCharCodes(

112 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider	98 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));

113 // removing after this issue is resolved.

114 if (_is16BitCodeUnit()) {

115 return new String.fromCharCodes(codeunits);

116 } else {

117 return new String.fromCharCodes(

118 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));

119 }

120 }	99 }

121	100

122 /**	101 /**

123 * Produce a list of UTF-16 encoded bytes. This method prefixes the resulting	102 * Produce a list of UTF-16 encoded bytes. This method prefixes the resulting

124 * bytes with a big-endian byte-order-marker.	103 * bytes with a big-endian byte-order-marker.

125 */	104 */

126 List<int> encodeUtf16(String str) =>	105 List<int> encodeUtf16(String str) =>

127 encodeUtf16be(str, true);	106 encodeUtf16be(str, true);

128	107

129 /**	108 /**

(...skipping 61 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
191 * little-endian byte-order marker (BOM).	170 * little-endian byte-order marker (BOM).

192 */	171 */

193 bool hasUtf16leBom(List<int> utf16EncodedBytes, [int offset = 0, int length]) {	172 bool hasUtf16leBom(List<int> utf16EncodedBytes, [int offset = 0, int length]) {

194 int end = length != null ? offset + length : utf16EncodedBytes.length;	173 int end = length != null ? offset + length : utf16EncodedBytes.length;

195 return (offset + 2) <= end &&	174 return (offset + 2) <= end &&

196 utf16EncodedBytes[offset] == UNICODE_UTF_BOM_LO &&	175 utf16EncodedBytes[offset] == UNICODE_UTF_BOM_LO &&

197 utf16EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI;	176 utf16EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI;

198 }	177 }

199	178

200 List<int> _stringToUtf16CodeUnits(String str) {	179 List<int> _stringToUtf16CodeUnits(String str) {

201 // TODO is16BitCodeUnit() is used to work around a bug with dart2js	180 return _codepointsToUtf16CodeUnits(str.charCodes);

202 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider

203 // removing after this issue is resolved.

204 if (_is16BitCodeUnit()) {

205 return str.charCodes;

206 } else {

207 return _codepointsToUtf16CodeUnits(str.charCodes);

208 }

209 }	181 }

210	182

211 typedef _ListRangeIterator _CodeUnitsProvider();	183 typedef _ListRangeIterator _CodeUnitsProvider();

212	184

213 /**	185 /**

214 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type	186 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type

215 * provides an iterator on demand and the iterator will only translate bytes	187 * provides an iterator on demand and the iterator will only translate bytes

216 * as requested by the user of the iterator. (Note: results are not cached.)	188 * as requested by the user of the iterator. (Note: results are not cached.)

217 */	189 */

218 class IterableUtf16Decoder implements Iterable<int> {	190 class IterableUtf16Decoder implements Iterable<int> {

(...skipping 124 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
343 skip();	315 skip();

344 }	316 }

345 }	317 }

346	318

347 int decode() {	319 int decode() {

348 int lo = utf16EncodedBytesIterator.next();	320 int lo = utf16EncodedBytesIterator.next();

349 int hi = utf16EncodedBytesIterator.next();	321 int hi = utf16EncodedBytesIterator.next();

350 return (hi << 8) + lo;	322 return (hi << 8) + lo;

351 }	323 }

352 }	324 }

OLD	NEW

« runtime/vm/object_test.cc ('K') | « sdk/lib/io/string_stream.dart ('k') | sdk/lib/utf/utf_core.dart » ('j') | no next file with comments »