Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(183)

Side by Side Diff: sdk/lib/utf/utf32.dart

Issue 11410086: Use iterator, moveNext(), current. (Closed) Base URL: https://dart.googlecode.com/svn/experimental/lib_v2/dart
Patch Set: Address comments. Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 /** 5 /**
6 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert 6 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert
7 * as much of the input as needed. Determines the byte order from the BOM, 7 * as much of the input as needed. Determines the byte order from the BOM,
8 * or uses big-endian as a default. This method always strips a leading BOM. 8 * or uses big-endian as a default. This method always strips a leading BOM.
9 * Set the replacementCharacter to null to throw an ArgumentError 9 * Set the replacementCharacter to null to throw an ArgumentError
10 * rather than replace the bad value. 10 * rather than replace the bad value.
(...skipping 167 matching lines...) Expand 10 before | Expand all | Expand 10 after
178 /** 178 /**
179 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type 179 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type
180 * provides an iterator on demand and the iterator will only translate bytes 180 * provides an iterator on demand and the iterator will only translate bytes
181 * as requested by the user of the iterator. (Note: results are not cached.) 181 * as requested by the user of the iterator. (Note: results are not cached.)
182 */ 182 */
183 class IterableUtf32Decoder extends Iterable<int> { 183 class IterableUtf32Decoder extends Iterable<int> {
184 final Utf32BytesDecoderProvider codeunitsProvider; 184 final Utf32BytesDecoderProvider codeunitsProvider;
185 185
186 IterableUtf32Decoder._(this.codeunitsProvider); 186 IterableUtf32Decoder._(this.codeunitsProvider);
187 187
188 Utf32BytesDecoder iterator() => codeunitsProvider(); 188 Utf32BytesDecoder get iterator => codeunitsProvider();
189 } 189 }
190 190
191 /** 191 /**
192 * Abstrace parent class converts encoded bytes to codepoints. 192 * Abstrace parent class converts encoded bytes to codepoints.
193 */ 193 */
194 class Utf32BytesDecoder implements _ListRangeIterator { 194 class Utf32BytesDecoder implements _ListRangeIterator {
195 final _ListRangeIterator utf32EncodedBytesIterator; 195 final _ListRangeIterator utf32EncodedBytesIterator;
196 final int replacementCodepoint; 196 final int replacementCodepoint;
197 int _current = -1;
197 198
198 Utf32BytesDecoder._fromListRangeIterator( 199 Utf32BytesDecoder._fromListRangeIterator(
199 this.utf32EncodedBytesIterator, this.replacementCodepoint); 200 this.utf32EncodedBytesIterator, this.replacementCodepoint);
200 201
201 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [ 202 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [
202 int offset = 0, int length, 203 int offset = 0, int length,
203 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 204 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
204 if (length == null) { 205 if (length == null) {
205 length = utf32EncodedBytes.length - offset; 206 length = utf32EncodedBytes.length - offset;
206 } 207 }
207 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) { 208 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) {
208 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, 209 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,
209 false, replacementCodepoint); 210 false, replacementCodepoint);
210 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) { 211 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) {
211 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, 212 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,
212 false, replacementCodepoint); 213 false, replacementCodepoint);
213 } else { 214 } else {
214 return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false, 215 return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false,
215 replacementCodepoint); 216 replacementCodepoint);
216 } 217 }
217 } 218 }
218 219
219 List<int> decodeRest() { 220 List<int> decodeRest() {
220 List<int> codeunits = new List<int>(remaining); 221 List<int> codeunits = new List<int>(remaining);
221 int i = 0; 222 int i = 0;
222 while (hasNext) { 223 while (moveNext()) {
223 codeunits[i++] = next(); 224 codeunits[i++] = current;
224 } 225 }
225 return codeunits; 226 return codeunits;
226 } 227 }
227 228
228 bool get hasNext => utf32EncodedBytesIterator.hasNext; 229 int get current {
230 if (_current == -1) {
231 // TODO(floitsch): bad error message.
232 throw new StateError("No more elements");
233 }
234 return _current;
235 }
229 236
230 int next() { 237 bool moveNext() {
238 _current = -1;
231 if (utf32EncodedBytesIterator.remaining < 4) { 239 if (utf32EncodedBytesIterator.remaining < 4) {
232 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining); 240 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining);
233 if (replacementCodepoint != null) { 241 if (replacementCodepoint != null) {
234 return replacementCodepoint; 242 _current = replacementCodepoint;
243 return true;
235 } else { 244 } else {
236 throw new ArgumentError( 245 throw new ArgumentError(
237 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}"); 246 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}");
238 } 247 }
239 } else { 248 } else {
240 int codepoint = decode(); 249 int codepoint = decode();
241 if (_validCodepoint(codepoint)) { 250 if (_validCodepoint(codepoint)) {
242 return codepoint; 251 _current = codepoint;
252 return true;
243 } else if (replacementCodepoint != null) { 253 } else if (replacementCodepoint != null) {
244 return replacementCodepoint; 254 _current = replacementCodepoint;
255 return true;
245 } else { 256 } else {
246 throw new ArgumentError( 257 throw new ArgumentError(
247 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}"); 258 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}");
248 } 259 }
249 } 260 }
250 } 261 }
251 262
252 int get position => utf32EncodedBytesIterator.position ~/ 4; 263 int get position => utf32EncodedBytesIterator.position ~/ 4;
253 264
254 void backup([int by = 1]) { 265 void backup([int by = 1]) {
(...skipping 10 matching lines...) Expand all
265 } 276 }
266 277
267 /** 278 /**
268 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes 279 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes
269 * to produce the unicode codepoint. 280 * to produce the unicode codepoint.
270 */ 281 */
271 class Utf32beBytesDecoder extends Utf32BytesDecoder { 282 class Utf32beBytesDecoder extends Utf32BytesDecoder {
272 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, 283 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,
273 int length, bool stripBom = true, 284 int length, bool stripBom = true,
274 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : 285 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
275 super._fromListRangeIterator((new _ListRange(utf32EncodedBytes, offset, 286 super._fromListRangeIterator(
276 length)).iterator(), replacementCodepoint) { 287 (new _ListRange(utf32EncodedBytes, offset, length)).iterator,
288 replacementCodepoint) {
277 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) { 289 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) {
278 skip(); 290 skip();
279 } 291 }
280 } 292 }
281 293
282 int decode() { 294 int decode() {
283 int value = utf32EncodedBytesIterator.next(); 295 utf32EncodedBytesIterator.moveNext();
284 value = (value << 8) + utf32EncodedBytesIterator.next(); 296 int value = utf32EncodedBytesIterator.current;
285 value = (value << 8) + utf32EncodedBytesIterator.next(); 297 utf32EncodedBytesIterator.moveNext();
286 value = (value << 8) + utf32EncodedBytesIterator.next(); 298 value = (value << 8) + utf32EncodedBytesIterator.current;
299 utf32EncodedBytesIterator.moveNext();
300 value = (value << 8) + utf32EncodedBytesIterator.current;
301 utf32EncodedBytesIterator.moveNext();
302 value = (value << 8) + utf32EncodedBytesIterator.current;
287 return value; 303 return value;
288 } 304 }
289 } 305 }
290 306
291 /** 307 /**
292 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes 308 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes
293 * to produce the unicode codepoint. 309 * to produce the unicode codepoint.
294 */ 310 */
295 class Utf32leBytesDecoder extends Utf32BytesDecoder { 311 class Utf32leBytesDecoder extends Utf32BytesDecoder {
296 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, 312 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,
297 int length, bool stripBom = true, 313 int length, bool stripBom = true,
298 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : 314 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
299 super._fromListRangeIterator((new _ListRange(utf32EncodedBytes, offset, 315 super._fromListRangeIterator(
300 length)).iterator(), replacementCodepoint) { 316 (new _ListRange(utf32EncodedBytes, offset, length)).iterator,
317 replacementCodepoint) {
301 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) { 318 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) {
302 skip(); 319 skip();
303 } 320 }
304 } 321 }
305 322
306 int decode() { 323 int decode() {
307 int value = (utf32EncodedBytesIterator.next()); 324 utf32EncodedBytesIterator.moveNext();
308 value += (utf32EncodedBytesIterator.next() << 8); 325 int value = utf32EncodedBytesIterator.current;
309 value += (utf32EncodedBytesIterator.next() << 16); 326 utf32EncodedBytesIterator.moveNext();
310 value += (utf32EncodedBytesIterator.next() << 24); 327 value += (utf32EncodedBytesIterator.current << 8);
328 utf32EncodedBytesIterator.moveNext();
329 value += (utf32EncodedBytesIterator.current << 16);
330 utf32EncodedBytesIterator.moveNext();
331 value += (utf32EncodedBytesIterator.current << 24);
311 return value; 332 return value;
312 } 333 }
313 } 334 }
314 335
315 bool _validCodepoint(int codepoint) { 336 bool _validCodepoint(int codepoint) {
316 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) || 337 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) ||
317 (codepoint > UNICODE_UTF16_RESERVED_HI && 338 (codepoint > UNICODE_UTF16_RESERVED_HI &&
318 codepoint < UNICODE_VALID_RANGE_MAX); 339 codepoint < UNICODE_VALID_RANGE_MAX);
319 } 340 }
OLDNEW
« runtime/vm/intrinsifier.h ('K') | « sdk/lib/utf/utf16.dart ('k') | sdk/lib/utf/utf8.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698