Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(559)

Side by Side Diff: sdk/lib/utf/utf16.dart

Issue 11410086: Use iterator, moveNext(), current. (Closed) Base URL: https://dart.googlecode.com/svn/experimental/lib_v2/dart
Patch Set: Address comments. Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 5
6 /** 6 /**
7 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert 7 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert
8 * as much of the input as needed. Determines the byte order from the BOM, 8 * as much of the input as needed. Determines the byte order from the BOM,
9 * or uses big-endian as a default. This method always strips a leading BOM. 9 * or uses big-endian as a default. This method always strips a leading BOM.
10 * Set the [replacementCodepoint] to null to throw an ArgumentError 10 * Set the [replacementCodepoint] to null to throw an ArgumentError
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after
214 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type 214 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type
215 * provides an iterator on demand and the iterator will only translate bytes 215 * provides an iterator on demand and the iterator will only translate bytes
216 * as requested by the user of the iterator. (Note: results are not cached.) 216 * as requested by the user of the iterator. (Note: results are not cached.)
217 */ 217 */
218 class IterableUtf16Decoder extends Iterable<int> { 218 class IterableUtf16Decoder extends Iterable<int> {
219 final _CodeUnitsProvider codeunitsProvider; 219 final _CodeUnitsProvider codeunitsProvider;
220 final int replacementCodepoint; 220 final int replacementCodepoint;
221 221
222 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint); 222 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint);
223 223
224 Utf16CodeUnitDecoder iterator() => 224 Utf16CodeUnitDecoder get iterator =>
225 new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(), 225 new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(),
226 replacementCodepoint); 226 replacementCodepoint);
227 } 227 }
228 228
229 /** 229 /**
230 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes 230 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes
231 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine 231 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine
232 * endian-ness, and defaults to BE. 232 * endian-ness, and defaults to BE.
233 */ 233 */
234 class Utf16BytesToCodeUnitsDecoder implements _ListRangeIterator { 234 class Utf16BytesToCodeUnitsDecoder implements _ListRangeIterator {
235 final _ListRangeIterator utf16EncodedBytesIterator; 235 final _ListRangeIterator utf16EncodedBytesIterator;
236 final int replacementCodepoint; 236 final int replacementCodepoint;
237 int _current = -1;
237 238
238 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator( 239 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator(
239 this.utf16EncodedBytesIterator, this.replacementCodepoint); 240 this.utf16EncodedBytesIterator, this.replacementCodepoint);
240 241
241 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ 242 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [
242 int offset = 0, int length, 243 int offset = 0, int length,
243 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 244 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
244 if (length == null) { 245 if (length == null) {
245 length = utf16EncodedBytes.length - offset; 246 length = utf16EncodedBytes.length - offset;
246 } 247 }
(...skipping 10 matching lines...) Expand all
257 } 258 }
258 259
259 /** 260 /**
260 * Provides a fast way to decode the rest of the source bytes in a single 261 * Provides a fast way to decode the rest of the source bytes in a single
261 * call. This method trades memory for improved speed in that it potentially 262 * call. This method trades memory for improved speed in that it potentially
262 * over-allocates the List containing results. 263 * over-allocates the List containing results.
263 */ 264 */
264 List<int> decodeRest() { 265 List<int> decodeRest() {
265 List<int> codeunits = new List<int>(remaining); 266 List<int> codeunits = new List<int>(remaining);
266 int i = 0; 267 int i = 0;
267 while (hasNext) { 268 while (moveNext()) {
268 codeunits[i++] = next(); 269 codeunits[i++] = current;
269 } 270 }
270 if (i == codeunits.length) { 271 if (i == codeunits.length) {
271 return codeunits; 272 return codeunits;
272 } else { 273 } else {
273 List<int> truncCodeunits = new List<int>(i); 274 List<int> truncCodeunits = new List<int>(i);
274 truncCodeunits.setRange(0, i, codeunits); 275 truncCodeunits.setRange(0, i, codeunits);
275 return truncCodeunits; 276 return truncCodeunits;
276 } 277 }
277 } 278 }
278 279
279 bool get hasNext => utf16EncodedBytesIterator.hasNext; 280 int get current {
281 if (_current == -1) {
282 // TODO(floitsch): bad error message.
283 throw new StateError("No more elements");
284 }
285 }
280 286
281 int next() { 287 bool moveNext() {
288 _current = -1;
282 if (utf16EncodedBytesIterator.remaining < 2) { 289 if (utf16EncodedBytesIterator.remaining < 2) {
283 utf16EncodedBytesIterator.next(); 290 utf16EncodedBytesIterator.moveNext();
284 if (replacementCodepoint != null) { 291 if (replacementCodepoint != null) {
285 return replacementCodepoint; 292 _current = replacementCodepoint;
293 return true;
286 } else { 294 } else {
287 throw new ArgumentError( 295 throw new ArgumentError(
288 "Invalid UTF16 at ${utf16EncodedBytesIterator.position}"); 296 "Invalid UTF16 at ${utf16EncodedBytesIterator.position}");
289 } 297 }
290 } else { 298 } else {
291 return decode(); 299 _current = decode();
300 return true;
292 } 301 }
293 } 302 }
294 303
295 int get position => utf16EncodedBytesIterator.position ~/ 2; 304 int get position => utf16EncodedBytesIterator.position ~/ 2;
296 305
297 void backup([int by = 1]) { 306 void backup([int by = 1]) {
298 utf16EncodedBytesIterator.backup(2 * by); 307 utf16EncodedBytesIterator.backup(2 * by);
299 } 308 }
300 309
301 int get remaining => (utf16EncodedBytesIterator.remaining + 1) ~/ 2; 310 int get remaining => (utf16EncodedBytesIterator.remaining + 1) ~/ 2;
302 311
303 void skip([int count = 1]) { 312 void skip([int count = 1]) {
304 utf16EncodedBytesIterator.skip(2 * count); 313 utf16EncodedBytesIterator.skip(2 * count);
305 } 314 }
306 315
307 abstract int decode(); 316 abstract int decode();
308 } 317 }
309 318
310 /** 319 /**
311 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes 320 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes
312 * to produce the code unit (0-(2^16)-1). 321 * to produce the code unit (0-(2^16)-1).
313 */ 322 */
314 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { 323 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {
315 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ 324 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [
316 int offset = 0, int length, bool stripBom = true, 325 int offset = 0, int length, bool stripBom = true,
317 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : 326 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
318 super._fromListRangeIterator((new _ListRange(utf16EncodedBytes, offset, 327 super._fromListRangeIterator(
319 length)).iterator(), replacementCodepoint) { 328 (new _ListRange(utf16EncodedBytes, offset, length)).iterator,
329 replacementCodepoint) {
320 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) { 330 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) {
321 skip(); 331 skip();
322 } 332 }
323 } 333 }
324 334
325 int decode() { 335 int decode() {
326 int hi = utf16EncodedBytesIterator.next(); 336 utf16EncodedBytesIterator.moveNext();
327 int lo = utf16EncodedBytesIterator.next(); 337 int hi = utf16EncodedBytesIterator.current;
338 utf16EncodedBytesIterator.moveNext();
339 int lo = utf16EncodedBytesIterator.current;
328 return (hi << 8) + lo; 340 return (hi << 8) + lo;
329 } 341 }
330 } 342 }
331 343
332 /** 344 /**
333 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes 345 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes
334 * to produce the code unit (0-(2^16)-1). 346 * to produce the code unit (0-(2^16)-1).
335 */ 347 */
336 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { 348 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {
337 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ 349 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [
338 int offset = 0, int length, bool stripBom = true, 350 int offset = 0, int length, bool stripBom = true,
339 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : 351 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
340 super._fromListRangeIterator((new _ListRange(utf16EncodedBytes, offset, 352 super._fromListRangeIterator(
341 length)).iterator(), replacementCodepoint) { 353 (new _ListRange(utf16EncodedBytes, offset, length)).iterator,
354 replacementCodepoint) {
342 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) { 355 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) {
343 skip(); 356 skip();
344 } 357 }
345 } 358 }
346 359
347 int decode() { 360 int decode() {
348 int lo = utf16EncodedBytesIterator.next(); 361 utf16EncodedBytesIterator.moveNext();
349 int hi = utf16EncodedBytesIterator.next(); 362 int lo = utf16EncodedBytesIterator.current;
363 utf16EncodedBytesIterator.moveNext();
364 int hi = utf16EncodedBytesIterator.current;
350 return (hi << 8) + lo; 365 return (hi << 8) + lo;
351 } 366 }
352 } 367 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698