sdk/lib/utf/utf32.dart - Issue 11410086: Use iterator, moveNext(), current.

Side by Side Diff: sdk/lib/utf/utf32.dart

Issue 11410086: Use iterator, moveNext(), current. (Closed) Base URL: https://dart.googlecode.com/svn/experimental/lib_v2/dart

Patch Set: Address comments. Created 8 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 /**	5 /**

6 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert	6 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert

7 * as much of the input as needed. Determines the byte order from the BOM,	7 * as much of the input as needed. Determines the byte order from the BOM,

8 * or uses big-endian as a default. This method always strips a leading BOM.	8 * or uses big-endian as a default. This method always strips a leading BOM.

9 * Set the replacementCharacter to null to throw an ArgumentError	9 * Set the replacementCharacter to null to throw an ArgumentError

10 * rather than replace the bad value.	10 * rather than replace the bad value.

(...skipping 167 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
178 /**	178 /**

179 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type	179 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type

180 * provides an iterator on demand and the iterator will only translate bytes	180 * provides an iterator on demand and the iterator will only translate bytes

181 * as requested by the user of the iterator. (Note: results are not cached.)	181 * as requested by the user of the iterator. (Note: results are not cached.)

182 */	182 */

183 class IterableUtf32Decoder extends Iterable<int> {	183 class IterableUtf32Decoder extends Iterable<int> {

184 final Utf32BytesDecoderProvider codeunitsProvider;	184 final Utf32BytesDecoderProvider codeunitsProvider;

185	185

186 IterableUtf32Decoder._(this.codeunitsProvider);	186 IterableUtf32Decoder._(this.codeunitsProvider);

187	187

188 Utf32BytesDecoder iterator() => codeunitsProvider();	188 Utf32BytesDecoder get iterator => codeunitsProvider();

189 }	189 }

190	190

191 /**	191 /**

192 * Abstrace parent class converts encoded bytes to codepoints.	192 * Abstrace parent class converts encoded bytes to codepoints.

193 */	193 */

194 class Utf32BytesDecoder implements _ListRangeIterator {	194 class Utf32BytesDecoder implements _ListRangeIterator {

195 final _ListRangeIterator utf32EncodedBytesIterator;	195 final _ListRangeIterator utf32EncodedBytesIterator;

196 final int replacementCodepoint;	196 final int replacementCodepoint;

	197 int _current = -1;

197	198

198 Utf32BytesDecoder._fromListRangeIterator(	199 Utf32BytesDecoder._fromListRangeIterator(

199 this.utf32EncodedBytesIterator, this.replacementCodepoint);	200 this.utf32EncodedBytesIterator, this.replacementCodepoint);

200	201

201 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [	202 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [

202 int offset = 0, int length,	203 int offset = 0, int length,

203 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	204 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

204 if (length == null) {	205 if (length == null) {

205 length = utf32EncodedBytes.length - offset;	206 length = utf32EncodedBytes.length - offset;

206 }	207 }

207 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) {	208 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) {

208 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,	209 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,

209 false, replacementCodepoint);	210 false, replacementCodepoint);

210 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) {	211 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) {

211 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,	212 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,

212 false, replacementCodepoint);	213 false, replacementCodepoint);

213 } else {	214 } else {

214 return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false,	215 return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false,

215 replacementCodepoint);	216 replacementCodepoint);

216 }	217 }

217 }	218 }

218	219

219 List<int> decodeRest() {	220 List<int> decodeRest() {

220 List<int> codeunits = new List<int>(remaining);	221 List<int> codeunits = new List<int>(remaining);

221 int i = 0;	222 int i = 0;

222 while (hasNext) {	223 while (moveNext()) {

223 codeunits[i++] = next();	224 codeunits[i++] = current;

224 }	225 }

225 return codeunits;	226 return codeunits;

226 }	227 }

227	228

228 bool get hasNext => utf32EncodedBytesIterator.hasNext;	229 int get current {

	230 if (_current == -1) {

	231 // TODO(floitsch): bad error message.

	232 throw new StateError("No more elements");

	233 }

	234 return _current;

	235 }

229	236

230 int next() {	237 bool moveNext() {

	238 _current = -1;

231 if (utf32EncodedBytesIterator.remaining < 4) {	239 if (utf32EncodedBytesIterator.remaining < 4) {

232 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining);	240 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining);

233 if (replacementCodepoint != null) {	241 if (replacementCodepoint != null) {

234 return replacementCodepoint;	242 _current = replacementCodepoint;

	243 return true;

235 } else {	244 } else {

236 throw new ArgumentError(	245 throw new ArgumentError(

237 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}");	246 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}");

238 }	247 }

239 } else {	248 } else {

240 int codepoint = decode();	249 int codepoint = decode();

241 if (_validCodepoint(codepoint)) {	250 if (_validCodepoint(codepoint)) {

242 return codepoint;	251 _current = codepoint;

	252 return true;

243 } else if (replacementCodepoint != null) {	253 } else if (replacementCodepoint != null) {

244 return replacementCodepoint;	254 _current = replacementCodepoint;

	255 return true;

245 } else {	256 } else {

246 throw new ArgumentError(	257 throw new ArgumentError(

247 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}");	258 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}");

248 }	259 }

249 }	260 }

250 }	261 }

251	262

252 int get position => utf32EncodedBytesIterator.position ~/ 4;	263 int get position => utf32EncodedBytesIterator.position ~/ 4;

253	264

254 void backup([int by = 1]) {	265 void backup([int by = 1]) {

(...skipping 10 matching lines...) Expand all Loading...
265 }	276 }

266	277

267 /**	278 /**

268 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes	279 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes

269 * to produce the unicode codepoint.	280 * to produce the unicode codepoint.

270 */	281 */

271 class Utf32beBytesDecoder extends Utf32BytesDecoder {	282 class Utf32beBytesDecoder extends Utf32BytesDecoder {

272 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,	283 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,

273 int length, bool stripBom = true,	284 int length, bool stripBom = true,

274 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :	285 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :

275 super._fromListRangeIterator((new _ListRange(utf32EncodedBytes, offset,	286 super._fromListRangeIterator(

276 length)).iterator(), replacementCodepoint) {	287 (new _ListRange(utf32EncodedBytes, offset, length)).iterator,

	288 replacementCodepoint) {

277 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) {	289 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) {

278 skip();	290 skip();

279 }	291 }

280 }	292 }

281	293

282 int decode() {	294 int decode() {

283 int value = utf32EncodedBytesIterator.next();	295 utf32EncodedBytesIterator.moveNext();

284 value = (value << 8) + utf32EncodedBytesIterator.next();	296 int value = utf32EncodedBytesIterator.current;

285 value = (value << 8) + utf32EncodedBytesIterator.next();	297 utf32EncodedBytesIterator.moveNext();

286 value = (value << 8) + utf32EncodedBytesIterator.next();	298 value = (value << 8) + utf32EncodedBytesIterator.current;

	299 utf32EncodedBytesIterator.moveNext();

	300 value = (value << 8) + utf32EncodedBytesIterator.current;

	301 utf32EncodedBytesIterator.moveNext();

	302 value = (value << 8) + utf32EncodedBytesIterator.current;

287 return value;	303 return value;

288 }	304 }

289 }	305 }

290	306

291 /**	307 /**

292 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes	308 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes

293 * to produce the unicode codepoint.	309 * to produce the unicode codepoint.

294 */	310 */

295 class Utf32leBytesDecoder extends Utf32BytesDecoder {	311 class Utf32leBytesDecoder extends Utf32BytesDecoder {

296 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,	312 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,

297 int length, bool stripBom = true,	313 int length, bool stripBom = true,

298 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :	314 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :

299 super._fromListRangeIterator((new _ListRange(utf32EncodedBytes, offset,	315 super._fromListRangeIterator(

300 length)).iterator(), replacementCodepoint) {	316 (new _ListRange(utf32EncodedBytes, offset, length)).iterator,

	317 replacementCodepoint) {

301 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) {	318 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) {

302 skip();	319 skip();

303 }	320 }

304 }	321 }

305	322

306 int decode() {	323 int decode() {

307 int value = (utf32EncodedBytesIterator.next());	324 utf32EncodedBytesIterator.moveNext();

308 value += (utf32EncodedBytesIterator.next() << 8);	325 int value = utf32EncodedBytesIterator.current;

309 value += (utf32EncodedBytesIterator.next() << 16);	326 utf32EncodedBytesIterator.moveNext();

310 value += (utf32EncodedBytesIterator.next() << 24);	327 value += (utf32EncodedBytesIterator.current << 8);

	328 utf32EncodedBytesIterator.moveNext();

	329 value += (utf32EncodedBytesIterator.current << 16);

	330 utf32EncodedBytesIterator.moveNext();

	331 value += (utf32EncodedBytesIterator.current << 24);

311 return value;	332 return value;

312 }	333 }

313 }	334 }

314	335

315 bool _validCodepoint(int codepoint) {	336 bool _validCodepoint(int codepoint) {

316 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) \|\|	337 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) \|\|

317 (codepoint > UNICODE_UTF16_RESERVED_HI &&	338 (codepoint > UNICODE_UTF16_RESERVED_HI &&

318 codepoint < UNICODE_VALID_RANGE_MAX);	339 codepoint < UNICODE_VALID_RANGE_MAX);

319 }	340 }

OLD	NEW

« runtime/vm/intrinsifier.h ('K') | « sdk/lib/utf/utf16.dart ('k') | sdk/lib/utf/utf8.dart » ('j') | no next file with comments »