Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(136)

Side by Side Diff: sdk/lib/utf/utf16.dart

Issue 11783009: Big merge from experimental to bleeding edge. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « sdk/lib/uri/uri.dart ('k') | sdk/lib/utf/utf32.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 part of dart.utf; 5 part of dart.utf;
6 6
7 /** 7 /**
8 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert 8 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert
9 * as much of the input as needed. Determines the byte order from the BOM, 9 * as much of the input as needed. Determines the byte order from the BOM,
10 * or uses big-endian as a default. This method always strips a leading BOM. 10 * or uses big-endian as a default. This method always strips a leading BOM.
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
106 List<int> encodeUtf16(String str) => 106 List<int> encodeUtf16(String str) =>
107 encodeUtf16be(str, true); 107 encodeUtf16be(str, true);
108 108
109 /** 109 /**
110 * Produce a list of UTF-16BE encoded bytes. By default, this method produces 110 * Produce a list of UTF-16BE encoded bytes. By default, this method produces
111 * UTF-16BE bytes with no BOM. 111 * UTF-16BE bytes with no BOM.
112 */ 112 */
113 List<int> encodeUtf16be(String str, [bool writeBOM = false]) { 113 List<int> encodeUtf16be(String str, [bool writeBOM = false]) {
114 List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str); 114 List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str);
115 List<int> encoding = 115 List<int> encoding =
116 new List<int>(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0)); 116 new List<int>.fixedLength(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0));
117 int i = 0; 117 int i = 0;
118 if (writeBOM) { 118 if (writeBOM) {
119 encoding[i++] = UNICODE_UTF_BOM_HI; 119 encoding[i++] = UNICODE_UTF_BOM_HI;
120 encoding[i++] = UNICODE_UTF_BOM_LO; 120 encoding[i++] = UNICODE_UTF_BOM_LO;
121 } 121 }
122 for (int unit in utf16CodeUnits) { 122 for (int unit in utf16CodeUnits) {
123 encoding[i++] = (unit & UNICODE_BYTE_ONE_MASK) >> 8; 123 encoding[i++] = (unit & UNICODE_BYTE_ONE_MASK) >> 8;
124 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; 124 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK;
125 } 125 }
126 return encoding; 126 return encoding;
127 } 127 }
128 128
129 /** 129 /**
130 * Produce a list of UTF-16LE encoded bytes. By default, this method produces 130 * Produce a list of UTF-16LE encoded bytes. By default, this method produces
131 * UTF-16LE bytes with no BOM. 131 * UTF-16LE bytes with no BOM.
132 */ 132 */
133 List<int> encodeUtf16le(String str, [bool writeBOM = false]) { 133 List<int> encodeUtf16le(String str, [bool writeBOM = false]) {
134 List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str); 134 List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str);
135 List<int> encoding = 135 List<int> encoding =
136 new List<int>(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0)); 136 new List<int>.fixedLength(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0));
137 int i = 0; 137 int i = 0;
138 if (writeBOM) { 138 if (writeBOM) {
139 encoding[i++] = UNICODE_UTF_BOM_LO; 139 encoding[i++] = UNICODE_UTF_BOM_LO;
140 encoding[i++] = UNICODE_UTF_BOM_HI; 140 encoding[i++] = UNICODE_UTF_BOM_HI;
141 } 141 }
142 for (int unit in utf16CodeUnits) { 142 for (int unit in utf16CodeUnits) {
143 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; 143 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK;
144 encoding[i++] = (unit & UNICODE_BYTE_ONE_MASK) >> 8; 144 encoding[i++] = (unit & UNICODE_BYTE_ONE_MASK) >> 8;
145 } 145 }
146 return encoding; 146 return encoding;
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
181 return _codepointsToUtf16CodeUnits(str.charCodes); 181 return _codepointsToUtf16CodeUnits(str.charCodes);
182 } 182 }
183 183
184 typedef _ListRangeIterator _CodeUnitsProvider(); 184 typedef _ListRangeIterator _CodeUnitsProvider();
185 185
186 /** 186 /**
187 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type 187 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type
188 * provides an iterator on demand and the iterator will only translate bytes 188 * provides an iterator on demand and the iterator will only translate bytes
189 * as requested by the user of the iterator. (Note: results are not cached.) 189 * as requested by the user of the iterator. (Note: results are not cached.)
190 */ 190 */
191 class IterableUtf16Decoder implements Iterable<int> { 191 // TODO(floitsch): Consider removing the extend and switch to implements since
192 // that's cheaper to allocate.
193 class IterableUtf16Decoder extends Iterable<int> {
192 final _CodeUnitsProvider codeunitsProvider; 194 final _CodeUnitsProvider codeunitsProvider;
193 final int replacementCodepoint; 195 final int replacementCodepoint;
194 196
195 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint); 197 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint);
196 198
197 Utf16CodeUnitDecoder iterator() => 199 Utf16CodeUnitDecoder get iterator =>
198 new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(), 200 new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(),
199 replacementCodepoint); 201 replacementCodepoint);
200 } 202 }
201 203
202 /** 204 /**
203 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes 205 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes
204 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine 206 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine
205 * endian-ness, and defaults to BE. 207 * endian-ness, and defaults to BE.
206 */ 208 */
207 class Utf16BytesToCodeUnitsDecoder implements _ListRangeIterator { 209 class Utf16BytesToCodeUnitsDecoder implements _ListRangeIterator {
208 final _ListRangeIterator utf16EncodedBytesIterator; 210 final _ListRangeIterator utf16EncodedBytesIterator;
209 final int replacementCodepoint; 211 final int replacementCodepoint;
212 int _current = null;
210 213
211 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator( 214 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator(
212 this.utf16EncodedBytesIterator, this.replacementCodepoint); 215 this.utf16EncodedBytesIterator, this.replacementCodepoint);
213 216
214 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ 217 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [
215 int offset = 0, int length, 218 int offset = 0, int length,
216 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 219 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
217 if (length == null) { 220 if (length == null) {
218 length = utf16EncodedBytes.length - offset; 221 length = utf16EncodedBytes.length - offset;
219 } 222 }
220 if (hasUtf16beBom(utf16EncodedBytes, offset, length)) { 223 if (hasUtf16beBom(utf16EncodedBytes, offset, length)) {
221 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2, 224 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2,
222 length - 2, false, replacementCodepoint); 225 length - 2, false, replacementCodepoint);
223 } else if (hasUtf16leBom(utf16EncodedBytes, offset, length)) { 226 } else if (hasUtf16leBom(utf16EncodedBytes, offset, length)) {
224 return new Utf16leBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2, 227 return new Utf16leBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2,
225 length - 2, false, replacementCodepoint); 228 length - 2, false, replacementCodepoint);
226 } else { 229 } else {
227 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset, 230 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset,
228 length, false, replacementCodepoint); 231 length, false, replacementCodepoint);
229 } 232 }
230 } 233 }
231 234
232 /** 235 /**
233 * Provides a fast way to decode the rest of the source bytes in a single 236 * Provides a fast way to decode the rest of the source bytes in a single
234 * call. This method trades memory for improved speed in that it potentially 237 * call. This method trades memory for improved speed in that it potentially
235 * over-allocates the List containing results. 238 * over-allocates the List containing results.
236 */ 239 */
237 List<int> decodeRest() { 240 List<int> decodeRest() {
238 List<int> codeunits = new List<int>(remaining); 241 List<int> codeunits = new List<int>.fixedLength(remaining);
239 int i = 0; 242 int i = 0;
240 while (hasNext) { 243 while (moveNext()) {
241 codeunits[i++] = next(); 244 codeunits[i++] = current;
242 } 245 }
243 if (i == codeunits.length) { 246 if (i == codeunits.length) {
244 return codeunits; 247 return codeunits;
245 } else { 248 } else {
246 List<int> truncCodeunits = new List<int>(i); 249 List<int> truncCodeunits = new List<int>.fixedLength(i);
247 truncCodeunits.setRange(0, i, codeunits); 250 truncCodeunits.setRange(0, i, codeunits);
248 return truncCodeunits; 251 return truncCodeunits;
249 } 252 }
250 } 253 }
251 254
252 bool get hasNext => utf16EncodedBytesIterator.hasNext; 255 int get current => _current;
253 256
254 int next() { 257 bool moveNext() {
258 _current = null;
255 if (utf16EncodedBytesIterator.remaining < 2) { 259 if (utf16EncodedBytesIterator.remaining < 2) {
256 utf16EncodedBytesIterator.next(); 260 utf16EncodedBytesIterator.moveNext();
257 if (replacementCodepoint != null) { 261 if (replacementCodepoint != null) {
258 return replacementCodepoint; 262 _current = replacementCodepoint;
263 return true;
259 } else { 264 } else {
260 throw new ArgumentError( 265 throw new ArgumentError(
261 "Invalid UTF16 at ${utf16EncodedBytesIterator.position}"); 266 "Invalid UTF16 at ${utf16EncodedBytesIterator.position}");
262 } 267 }
263 } else { 268 } else {
264 return decode(); 269 _current = decode();
270 return true;
265 } 271 }
266 } 272 }
267 273
268 int get position => utf16EncodedBytesIterator.position ~/ 2; 274 int get position => utf16EncodedBytesIterator.position ~/ 2;
269 275
270 void backup([int by = 1]) { 276 void backup([int by = 1]) {
271 utf16EncodedBytesIterator.backup(2 * by); 277 utf16EncodedBytesIterator.backup(2 * by);
272 } 278 }
273 279
274 int get remaining => (utf16EncodedBytesIterator.remaining + 1) ~/ 2; 280 int get remaining => (utf16EncodedBytesIterator.remaining + 1) ~/ 2;
275 281
276 void skip([int count = 1]) { 282 void skip([int count = 1]) {
277 utf16EncodedBytesIterator.skip(2 * count); 283 utf16EncodedBytesIterator.skip(2 * count);
278 } 284 }
279 285
280 int decode(); 286 int decode();
281 } 287 }
282 288
283 /** 289 /**
284 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes 290 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes
285 * to produce the code unit (0-(2^16)-1). 291 * to produce the code unit (0-(2^16)-1).
286 */ 292 */
287 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { 293 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {
288 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ 294 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [
289 int offset = 0, int length, bool stripBom = true, 295 int offset = 0, int length, bool stripBom = true,
290 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : 296 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
291 super._fromListRangeIterator((new _ListRange(utf16EncodedBytes, offset, 297 super._fromListRangeIterator(
292 length)).iterator(), replacementCodepoint) { 298 (new _ListRange(utf16EncodedBytes, offset, length)).iterator,
299 replacementCodepoint) {
293 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) { 300 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) {
294 skip(); 301 skip();
295 } 302 }
296 } 303 }
297 304
298 int decode() { 305 int decode() {
299 int hi = utf16EncodedBytesIterator.next(); 306 utf16EncodedBytesIterator.moveNext();
300 int lo = utf16EncodedBytesIterator.next(); 307 int hi = utf16EncodedBytesIterator.current;
308 utf16EncodedBytesIterator.moveNext();
309 int lo = utf16EncodedBytesIterator.current;
301 return (hi << 8) + lo; 310 return (hi << 8) + lo;
302 } 311 }
303 } 312 }
304 313
305 /** 314 /**
306 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes 315 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes
307 * to produce the code unit (0-(2^16)-1). 316 * to produce the code unit (0-(2^16)-1).
308 */ 317 */
309 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { 318 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {
310 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ 319 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [
311 int offset = 0, int length, bool stripBom = true, 320 int offset = 0, int length, bool stripBom = true,
312 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : 321 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
313 super._fromListRangeIterator((new _ListRange(utf16EncodedBytes, offset, 322 super._fromListRangeIterator(
314 length)).iterator(), replacementCodepoint) { 323 (new _ListRange(utf16EncodedBytes, offset, length)).iterator,
324 replacementCodepoint) {
315 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) { 325 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) {
316 skip(); 326 skip();
317 } 327 }
318 } 328 }
319 329
320 int decode() { 330 int decode() {
321 int lo = utf16EncodedBytesIterator.next(); 331 utf16EncodedBytesIterator.moveNext();
322 int hi = utf16EncodedBytesIterator.next(); 332 int lo = utf16EncodedBytesIterator.current;
333 utf16EncodedBytesIterator.moveNext();
334 int hi = utf16EncodedBytesIterator.current;
323 return (hi << 8) + lo; 335 return (hi << 8) + lo;
324 } 336 }
325 } 337 }
OLDNEW
« no previous file with comments | « sdk/lib/uri/uri.dart ('k') | sdk/lib/utf/utf32.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698