sdk/lib/utf/utf16.dart - Issue 11410086: Use iterator, moveNext(), current.

Side by Side Diff: sdk/lib/utf/utf16.dart

Issue 11410086: Use iterator, moveNext(), current. (Closed) Base URL: https://dart.googlecode.com/svn/experimental/lib_v2/dart

Patch Set: Address comments. Created 8 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5	5

6 /**	6 /**

7 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert	7 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert

8 * as much of the input as needed. Determines the byte order from the BOM,	8 * as much of the input as needed. Determines the byte order from the BOM,

9 * or uses big-endian as a default. This method always strips a leading BOM.	9 * or uses big-endian as a default. This method always strips a leading BOM.

10 * Set the [replacementCodepoint] to null to throw an ArgumentError	10 * Set the [replacementCodepoint] to null to throw an ArgumentError

(...skipping 203 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
214 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type	214 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type

215 * provides an iterator on demand and the iterator will only translate bytes	215 * provides an iterator on demand and the iterator will only translate bytes

216 * as requested by the user of the iterator. (Note: results are not cached.)	216 * as requested by the user of the iterator. (Note: results are not cached.)

217 */	217 */

218 class IterableUtf16Decoder extends Iterable<int> {	218 class IterableUtf16Decoder extends Iterable<int> {

219 final _CodeUnitsProvider codeunitsProvider;	219 final _CodeUnitsProvider codeunitsProvider;

220 final int replacementCodepoint;	220 final int replacementCodepoint;

221	221

222 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint);	222 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint);

223	223

224 Utf16CodeUnitDecoder iterator() =>	224 Utf16CodeUnitDecoder get iterator =>

225 new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(),	225 new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(),

226 replacementCodepoint);	226 replacementCodepoint);

227 }	227 }

228	228

229 /**	229 /**

230 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes	230 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes

231 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine	231 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine

232 * endian-ness, and defaults to BE.	232 * endian-ness, and defaults to BE.

233 */	233 */

234 class Utf16BytesToCodeUnitsDecoder implements _ListRangeIterator {	234 class Utf16BytesToCodeUnitsDecoder implements _ListRangeIterator {

235 final _ListRangeIterator utf16EncodedBytesIterator;	235 final _ListRangeIterator utf16EncodedBytesIterator;

236 final int replacementCodepoint;	236 final int replacementCodepoint;

	237 int _current = -1;

237	238

238 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator(	239 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator(

239 this.utf16EncodedBytesIterator, this.replacementCodepoint);	240 this.utf16EncodedBytesIterator, this.replacementCodepoint);

240	241

241 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [	242 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [

242 int offset = 0, int length,	243 int offset = 0, int length,

243 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	244 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

244 if (length == null) {	245 if (length == null) {

245 length = utf16EncodedBytes.length - offset;	246 length = utf16EncodedBytes.length - offset;

246 }	247 }

(...skipping 10 matching lines...) Expand all Loading...
257 }	258 }

258	259

259 /**	260 /**

260 * Provides a fast way to decode the rest of the source bytes in a single	261 * Provides a fast way to decode the rest of the source bytes in a single

261 * call. This method trades memory for improved speed in that it potentially	262 * call. This method trades memory for improved speed in that it potentially

262 * over-allocates the List containing results.	263 * over-allocates the List containing results.

263 */	264 */

264 List<int> decodeRest() {	265 List<int> decodeRest() {

265 List<int> codeunits = new List<int>(remaining);	266 List<int> codeunits = new List<int>(remaining);

266 int i = 0;	267 int i = 0;

267 while (hasNext) {	268 while (moveNext()) {

268 codeunits[i++] = next();	269 codeunits[i++] = current;

269 }	270 }

270 if (i == codeunits.length) {	271 if (i == codeunits.length) {

271 return codeunits;	272 return codeunits;

272 } else {	273 } else {

273 List<int> truncCodeunits = new List<int>(i);	274 List<int> truncCodeunits = new List<int>(i);

274 truncCodeunits.setRange(0, i, codeunits);	275 truncCodeunits.setRange(0, i, codeunits);

275 return truncCodeunits;	276 return truncCodeunits;

276 }	277 }

277 }	278 }

278	279

279 bool get hasNext => utf16EncodedBytesIterator.hasNext;	280 int get current {

	281 if (_current == -1) {

	282 // TODO(floitsch): bad error message.

	283 throw new StateError("No more elements");

	284 }

	285 }

280	286

281 int next() {	287 bool moveNext() {

	288 _current = -1;

282 if (utf16EncodedBytesIterator.remaining < 2) {	289 if (utf16EncodedBytesIterator.remaining < 2) {

283 utf16EncodedBytesIterator.next();	290 utf16EncodedBytesIterator.moveNext();

284 if (replacementCodepoint != null) {	291 if (replacementCodepoint != null) {

285 return replacementCodepoint;	292 _current = replacementCodepoint;

	293 return true;

286 } else {	294 } else {

287 throw new ArgumentError(	295 throw new ArgumentError(

288 "Invalid UTF16 at ${utf16EncodedBytesIterator.position}");	296 "Invalid UTF16 at ${utf16EncodedBytesIterator.position}");

289 }	297 }

290 } else {	298 } else {

291 return decode();	299 _current = decode();

	300 return true;

292 }	301 }

293 }	302 }

294	303

295 int get position => utf16EncodedBytesIterator.position ~/ 2;	304 int get position => utf16EncodedBytesIterator.position ~/ 2;

296	305

297 void backup([int by = 1]) {	306 void backup([int by = 1]) {

298 utf16EncodedBytesIterator.backup(2 * by);	307 utf16EncodedBytesIterator.backup(2 * by);

299 }	308 }

300	309

301 int get remaining => (utf16EncodedBytesIterator.remaining + 1) ~/ 2;	310 int get remaining => (utf16EncodedBytesIterator.remaining + 1) ~/ 2;

302	311

303 void skip([int count = 1]) {	312 void skip([int count = 1]) {

304 utf16EncodedBytesIterator.skip(2 * count);	313 utf16EncodedBytesIterator.skip(2 * count);

305 }	314 }

306	315

307 abstract int decode();	316 abstract int decode();

308 }	317 }

309	318

310 /**	319 /**

311 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes	320 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes

312 * to produce the code unit (0-(2^16)-1).	321 * to produce the code unit (0-(2^16)-1).

313 */	322 */

314 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {	323 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {

315 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [	324 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [

316 int offset = 0, int length, bool stripBom = true,	325 int offset = 0, int length, bool stripBom = true,

317 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :	326 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :

318 super._fromListRangeIterator((new _ListRange(utf16EncodedBytes, offset,	327 super._fromListRangeIterator(

319 length)).iterator(), replacementCodepoint) {	328 (new _ListRange(utf16EncodedBytes, offset, length)).iterator,

	329 replacementCodepoint) {

320 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) {	330 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) {

321 skip();	331 skip();

322 }	332 }

323 }	333 }

324	334

325 int decode() {	335 int decode() {

326 int hi = utf16EncodedBytesIterator.next();	336 utf16EncodedBytesIterator.moveNext();

327 int lo = utf16EncodedBytesIterator.next();	337 int hi = utf16EncodedBytesIterator.current;

	338 utf16EncodedBytesIterator.moveNext();

	339 int lo = utf16EncodedBytesIterator.current;

328 return (hi << 8) + lo;	340 return (hi << 8) + lo;

329 }	341 }

330 }	342 }

331	343

332 /**	344 /**

333 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes	345 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes

334 * to produce the code unit (0-(2^16)-1).	346 * to produce the code unit (0-(2^16)-1).

335 */	347 */

336 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {	348 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {

337 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [	349 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [

338 int offset = 0, int length, bool stripBom = true,	350 int offset = 0, int length, bool stripBom = true,

339 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :	351 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :

340 super._fromListRangeIterator((new _ListRange(utf16EncodedBytes, offset,	352 super._fromListRangeIterator(

341 length)).iterator(), replacementCodepoint) {	353 (new _ListRange(utf16EncodedBytes, offset, length)).iterator,

	354 replacementCodepoint) {

342 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) {	355 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) {

343 skip();	356 skip();

344 }	357 }

345 }	358 }

346	359

347 int decode() {	360 int decode() {

348 int lo = utf16EncodedBytesIterator.next();	361 utf16EncodedBytesIterator.moveNext();

349 int hi = utf16EncodedBytesIterator.next();	362 int lo = utf16EncodedBytesIterator.current;

	363 utf16EncodedBytesIterator.moveNext();

	364 int hi = utf16EncodedBytesIterator.current;

350 return (hi << 8) + lo;	365 return (hi << 8) + lo;

351 }	366 }

352 }	367 }

OLD	NEW

« runtime/vm/intrinsifier.h ('K') | « sdk/lib/html/templates/immutable_list_mixin.darttemplate ('k') | sdk/lib/utf/utf32.dart » ('j') | no next file with comments »