Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(736)

Side by Side Diff: sdk/lib/utf/utf16.dart

Issue 11418115: Fix Unicode issues in dart2js and dart2dart. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Remove accidental test expectation dupe Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 5
6 /** 6 /**
7 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert 7 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert
8 * as much of the input as needed. Determines the byte order from the BOM, 8 * as much of the input as needed. Determines the byte order from the BOM,
9 * or uses big-endian as a default. This method always strips a leading BOM. 9 * or uses big-endian as a default. This method always strips a leading BOM.
10 * Set the [replacementCodepoint] to null to throw an ArgumentError 10 * Set the [replacementCodepoint] to null to throw an ArgumentError
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
55 * Produce a String from a sequence of UTF-16 encoded bytes. This method always 55 * Produce a String from a sequence of UTF-16 encoded bytes. This method always
56 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an 56 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an
57 * ArgumentError rather than replace the bad value. The default 57 * ArgumentError rather than replace the bad value. The default
58 * value for the [replacementCodepoint] is U+FFFD. 58 * value for the [replacementCodepoint] is U+FFFD.
59 */ 59 */
60 String decodeUtf16(List<int> bytes, [int offset = 0, int length, 60 String decodeUtf16(List<int> bytes, [int offset = 0, int length,
61 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 61 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
62 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes, 62 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes,
63 offset, length, replacementCodepoint); 63 offset, length, replacementCodepoint);
64 List<int> codeunits = decoder.decodeRest(); 64 List<int> codeunits = decoder.decodeRest();
65 // TODO is16BitCodeUnit() is used to work around a bug with dart2js 65 return new String.fromCharCodes(
66 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider 66 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));
67 // removing after this issue is resolved.
68 if (_is16BitCodeUnit()) {
69 return new String.fromCharCodes(codeunits);
70 } else {
71 return new String.fromCharCodes(
72 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));
73 }
74 } 67 }
75 68
76 /** 69 /**
77 * Produce a String from a sequence of UTF-16BE encoded bytes. This method 70 * Produce a String from a sequence of UTF-16BE encoded bytes. This method
78 * strips a leading BOM by default, but can be overridden by setting the 71 * strips a leading BOM by default, but can be overridden by setting the
79 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to 72 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to
80 * null to throw an ArgumentError rather than replace the bad value. 73 * null to throw an ArgumentError rather than replace the bad value.
81 * The default value for the [replacementCodepoint] is U+FFFD. 74 * The default value for the [replacementCodepoint] is U+FFFD.
82 */ 75 */
83 String decodeUtf16be(List<int> bytes, [int offset = 0, int length, 76 String decodeUtf16be(List<int> bytes, [int offset = 0, int length,
84 bool stripBom = true, 77 bool stripBom = true,
85 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 78 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
86 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset, 79 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset,
87 length, stripBom, replacementCodepoint)).decodeRest(); 80 length, stripBom, replacementCodepoint)).decodeRest();
88 // TODO is16BitCodeUnit() is used to work around a bug with dart2js 81 return new String.fromCharCodes(
89 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider 82 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));
90 // removing after this issue is resolved.
91 if (_is16BitCodeUnit()) {
92 return new String.fromCharCodes(codeunits);
93 } else {
94 return new String.fromCharCodes(
95 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));
96 }
97 } 83 }
98 84
99 /** 85 /**
100 * Produce a String from a sequence of UTF-16LE encoded bytes. This method 86 * Produce a String from a sequence of UTF-16LE encoded bytes. This method
101 * strips a leading BOM by default, but can be overridden by setting the 87 * strips a leading BOM by default, but can be overridden by setting the
102 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to 88 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to
103 * null to throw an ArgumentError rather than replace the bad value. 89 * null to throw an ArgumentError rather than replace the bad value.
104 * The default value for the [replacementCodepoint] is U+FFFD. 90 * The default value for the [replacementCodepoint] is U+FFFD.
105 */ 91 */
106 String decodeUtf16le(List<int> bytes, [int offset = 0, int length, 92 String decodeUtf16le(List<int> bytes, [int offset = 0, int length,
107 bool stripBom = true, 93 bool stripBom = true,
108 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 94 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
109 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset, 95 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset,
110 length, stripBom, replacementCodepoint)).decodeRest(); 96 length, stripBom, replacementCodepoint)).decodeRest();
111 // TODO is16BitCodeUnit() is used to work around a bug with dart2js 97 return new String.fromCharCodes(
112 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider 98 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));
113 // removing after this issue is resolved.
114 if (_is16BitCodeUnit()) {
115 return new String.fromCharCodes(codeunits);
116 } else {
117 return new String.fromCharCodes(
118 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));
119 }
120 } 99 }
121 100
122 /** 101 /**
123 * Produce a list of UTF-16 encoded bytes. This method prefixes the resulting 102 * Produce a list of UTF-16 encoded bytes. This method prefixes the resulting
124 * bytes with a big-endian byte-order-marker. 103 * bytes with a big-endian byte-order-marker.
125 */ 104 */
126 List<int> encodeUtf16(String str) => 105 List<int> encodeUtf16(String str) =>
127 encodeUtf16be(str, true); 106 encodeUtf16be(str, true);
128 107
129 /** 108 /**
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
191 * little-endian byte-order marker (BOM). 170 * little-endian byte-order marker (BOM).
192 */ 171 */
193 bool hasUtf16leBom(List<int> utf16EncodedBytes, [int offset = 0, int length]) { 172 bool hasUtf16leBom(List<int> utf16EncodedBytes, [int offset = 0, int length]) {
194 int end = length != null ? offset + length : utf16EncodedBytes.length; 173 int end = length != null ? offset + length : utf16EncodedBytes.length;
195 return (offset + 2) <= end && 174 return (offset + 2) <= end &&
196 utf16EncodedBytes[offset] == UNICODE_UTF_BOM_LO && 175 utf16EncodedBytes[offset] == UNICODE_UTF_BOM_LO &&
197 utf16EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI; 176 utf16EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI;
198 } 177 }
199 178
200 List<int> _stringToUtf16CodeUnits(String str) { 179 List<int> _stringToUtf16CodeUnits(String str) {
201 // TODO is16BitCodeUnit() is used to work around a bug with dart2js 180 return _codepointsToUtf16CodeUnits(str.charCodes);
202 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider
203 // removing after this issue is resolved.
204 if (_is16BitCodeUnit()) {
205 return str.charCodes;
206 } else {
207 return _codepointsToUtf16CodeUnits(str.charCodes);
208 }
209 } 181 }
210 182
211 typedef _ListRangeIterator _CodeUnitsProvider(); 183 typedef _ListRangeIterator _CodeUnitsProvider();
212 184
213 /** 185 /**
214 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type 186 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type
215 * provides an iterator on demand and the iterator will only translate bytes 187 * provides an iterator on demand and the iterator will only translate bytes
216 * as requested by the user of the iterator. (Note: results are not cached.) 188 * as requested by the user of the iterator. (Note: results are not cached.)
217 */ 189 */
218 class IterableUtf16Decoder implements Iterable<int> { 190 class IterableUtf16Decoder implements Iterable<int> {
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after
343 skip(); 315 skip();
344 } 316 }
345 } 317 }
346 318
347 int decode() { 319 int decode() {
348 int lo = utf16EncodedBytesIterator.next(); 320 int lo = utf16EncodedBytesIterator.next();
349 int hi = utf16EncodedBytesIterator.next(); 321 int hi = utf16EncodedBytesIterator.next();
350 return (hi << 8) + lo; 322 return (hi << 8) + lo;
351 } 323 }
352 } 324 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698