Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(270)

Side by Side Diff: sdk/lib/utf/utf32.dart

Issue 11783009: Big merge from experimental to bleeding edge. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « sdk/lib/utf/utf16.dart ('k') | sdk/lib/utf/utf8.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 part of dart.utf; 5 part of dart.utf;
6 6
7 /** 7 /**
8 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert 8 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert
9 * as much of the input as needed. Determines the byte order from the BOM, 9 * as much of the input as needed. Determines the byte order from the BOM,
10 * or uses big-endian as a default. This method always strips a leading BOM. 10 * or uses big-endian as a default. This method always strips a leading BOM.
(...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after
175 utf32EncodedBytes[offset + 2] == 0 && utf32EncodedBytes[offset + 3] == 0; 175 utf32EncodedBytes[offset + 2] == 0 && utf32EncodedBytes[offset + 3] == 0;
176 } 176 }
177 177
178 typedef Utf32BytesDecoder Utf32BytesDecoderProvider(); 178 typedef Utf32BytesDecoder Utf32BytesDecoderProvider();
179 179
180 /** 180 /**
181 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type 181 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type
182 * provides an iterator on demand and the iterator will only translate bytes 182 * provides an iterator on demand and the iterator will only translate bytes
183 * as requested by the user of the iterator. (Note: results are not cached.) 183 * as requested by the user of the iterator. (Note: results are not cached.)
184 */ 184 */
185 class IterableUtf32Decoder implements Iterable<int> { 185 // TODO(floitsch): Consider removing the extend and switch to implements since
186 // that's cheaper to allocate.
187 class IterableUtf32Decoder extends Iterable<int> {
186 final Utf32BytesDecoderProvider codeunitsProvider; 188 final Utf32BytesDecoderProvider codeunitsProvider;
187 189
188 IterableUtf32Decoder._(this.codeunitsProvider); 190 IterableUtf32Decoder._(this.codeunitsProvider);
189 191
190 Utf32BytesDecoder iterator() => codeunitsProvider(); 192 Utf32BytesDecoder get iterator => codeunitsProvider();
191 } 193 }
192 194
193 /** 195 /**
194 * Abstrace parent class converts encoded bytes to codepoints. 196 * Abstrace parent class converts encoded bytes to codepoints.
195 */ 197 */
196 class Utf32BytesDecoder implements _ListRangeIterator { 198 class Utf32BytesDecoder implements _ListRangeIterator {
197 final _ListRangeIterator utf32EncodedBytesIterator; 199 final _ListRangeIterator utf32EncodedBytesIterator;
198 final int replacementCodepoint; 200 final int replacementCodepoint;
201 int _current = null;
199 202
200 Utf32BytesDecoder._fromListRangeIterator( 203 Utf32BytesDecoder._fromListRangeIterator(
201 this.utf32EncodedBytesIterator, this.replacementCodepoint); 204 this.utf32EncodedBytesIterator, this.replacementCodepoint);
202 205
203 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [ 206 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [
204 int offset = 0, int length, 207 int offset = 0, int length,
205 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 208 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
206 if (length == null) { 209 if (length == null) {
207 length = utf32EncodedBytes.length - offset; 210 length = utf32EncodedBytes.length - offset;
208 } 211 }
209 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) { 212 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) {
210 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, 213 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,
211 false, replacementCodepoint); 214 false, replacementCodepoint);
212 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) { 215 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) {
213 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, 216 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,
214 false, replacementCodepoint); 217 false, replacementCodepoint);
215 } else { 218 } else {
216 return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false, 219 return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false,
217 replacementCodepoint); 220 replacementCodepoint);
218 } 221 }
219 } 222 }
220 223
221 List<int> decodeRest() { 224 List<int> decodeRest() {
222 List<int> codeunits = new List<int>(remaining); 225 List<int> codeunits = new List<int>.fixedLength(remaining);
223 int i = 0; 226 int i = 0;
224 while (hasNext) { 227 while (moveNext()) {
225 codeunits[i++] = next(); 228 codeunits[i++] = current;
226 } 229 }
227 return codeunits; 230 return codeunits;
228 } 231 }
229 232
230 bool get hasNext => utf32EncodedBytesIterator.hasNext; 233 int get current => _current;
231 234
232 int next() { 235 bool moveNext() {
236 _current = null;
233 if (utf32EncodedBytesIterator.remaining < 4) { 237 if (utf32EncodedBytesIterator.remaining < 4) {
234 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining); 238 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining);
235 if (replacementCodepoint != null) { 239 if (replacementCodepoint != null) {
236 return replacementCodepoint; 240 _current = replacementCodepoint;
241 return true;
237 } else { 242 } else {
238 throw new ArgumentError( 243 throw new ArgumentError(
239 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}"); 244 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}");
240 } 245 }
241 } else { 246 } else {
242 int codepoint = decode(); 247 int codepoint = decode();
243 if (_validCodepoint(codepoint)) { 248 if (_validCodepoint(codepoint)) {
244 return codepoint; 249 _current = codepoint;
250 return true;
245 } else if (replacementCodepoint != null) { 251 } else if (replacementCodepoint != null) {
246 return replacementCodepoint; 252 _current = replacementCodepoint;
253 return true;
247 } else { 254 } else {
248 throw new ArgumentError( 255 throw new ArgumentError(
249 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}"); 256 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}");
250 } 257 }
251 } 258 }
252 } 259 }
253 260
254 int get position => utf32EncodedBytesIterator.position ~/ 4; 261 int get position => utf32EncodedBytesIterator.position ~/ 4;
255 262
256 void backup([int by = 1]) { 263 void backup([int by = 1]) {
(...skipping 10 matching lines...) Expand all
267 } 274 }
268 275
269 /** 276 /**
270 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes 277 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes
271 * to produce the unicode codepoint. 278 * to produce the unicode codepoint.
272 */ 279 */
273 class Utf32beBytesDecoder extends Utf32BytesDecoder { 280 class Utf32beBytesDecoder extends Utf32BytesDecoder {
274 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, 281 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,
275 int length, bool stripBom = true, 282 int length, bool stripBom = true,
276 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : 283 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
277 super._fromListRangeIterator((new _ListRange(utf32EncodedBytes, offset, 284 super._fromListRangeIterator(
278 length)).iterator(), replacementCodepoint) { 285 (new _ListRange(utf32EncodedBytes, offset, length)).iterator,
286 replacementCodepoint) {
279 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) { 287 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) {
280 skip(); 288 skip();
281 } 289 }
282 } 290 }
283 291
284 int decode() { 292 int decode() {
285 int value = utf32EncodedBytesIterator.next(); 293 utf32EncodedBytesIterator.moveNext();
286 value = (value << 8) + utf32EncodedBytesIterator.next(); 294 int value = utf32EncodedBytesIterator.current;
287 value = (value << 8) + utf32EncodedBytesIterator.next(); 295 utf32EncodedBytesIterator.moveNext();
288 value = (value << 8) + utf32EncodedBytesIterator.next(); 296 value = (value << 8) + utf32EncodedBytesIterator.current;
297 utf32EncodedBytesIterator.moveNext();
298 value = (value << 8) + utf32EncodedBytesIterator.current;
299 utf32EncodedBytesIterator.moveNext();
300 value = (value << 8) + utf32EncodedBytesIterator.current;
289 return value; 301 return value;
290 } 302 }
291 } 303 }
292 304
293 /** 305 /**
294 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes 306 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes
295 * to produce the unicode codepoint. 307 * to produce the unicode codepoint.
296 */ 308 */
297 class Utf32leBytesDecoder extends Utf32BytesDecoder { 309 class Utf32leBytesDecoder extends Utf32BytesDecoder {
298 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, 310 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,
299 int length, bool stripBom = true, 311 int length, bool stripBom = true,
300 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : 312 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
301 super._fromListRangeIterator((new _ListRange(utf32EncodedBytes, offset, 313 super._fromListRangeIterator(
302 length)).iterator(), replacementCodepoint) { 314 (new _ListRange(utf32EncodedBytes, offset, length)).iterator,
315 replacementCodepoint) {
303 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) { 316 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) {
304 skip(); 317 skip();
305 } 318 }
306 } 319 }
307 320
308 int decode() { 321 int decode() {
309 int value = (utf32EncodedBytesIterator.next()); 322 utf32EncodedBytesIterator.moveNext();
310 value += (utf32EncodedBytesIterator.next() << 8); 323 int value = utf32EncodedBytesIterator.current;
311 value += (utf32EncodedBytesIterator.next() << 16); 324 utf32EncodedBytesIterator.moveNext();
312 value += (utf32EncodedBytesIterator.next() << 24); 325 value += (utf32EncodedBytesIterator.current << 8);
326 utf32EncodedBytesIterator.moveNext();
327 value += (utf32EncodedBytesIterator.current << 16);
328 utf32EncodedBytesIterator.moveNext();
329 value += (utf32EncodedBytesIterator.current << 24);
313 return value; 330 return value;
314 } 331 }
315 } 332 }
316 333
317 bool _validCodepoint(int codepoint) { 334 bool _validCodepoint(int codepoint) {
318 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) || 335 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) ||
319 (codepoint > UNICODE_UTF16_RESERVED_HI && 336 (codepoint > UNICODE_UTF16_RESERVED_HI &&
320 codepoint < UNICODE_VALID_RANGE_MAX); 337 codepoint < UNICODE_VALID_RANGE_MAX);
321 } 338 }
OLDNEW
« no previous file with comments | « sdk/lib/utf/utf16.dart ('k') | sdk/lib/utf/utf8.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698