sdk/lib/utf/utf32.dart - Issue 11783009: Big merge from experimental to bleeding edge.

Side by Side Diff: sdk/lib/utf/utf32.dart

Issue 11783009: Big merge from experimental to bleeding edge. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Created 7 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 part of dart.utf;	5 part of dart.utf;

6	6

7 /**	7 /**

8 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert	8 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert

9 * as much of the input as needed. Determines the byte order from the BOM,	9 * as much of the input as needed. Determines the byte order from the BOM,

10 * or uses big-endian as a default. This method always strips a leading BOM.	10 * or uses big-endian as a default. This method always strips a leading BOM.

(...skipping 164 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
175 utf32EncodedBytes[offset + 2] == 0 && utf32EncodedBytes[offset + 3] == 0;	175 utf32EncodedBytes[offset + 2] == 0 && utf32EncodedBytes[offset + 3] == 0;

176 }	176 }

177	177

178 typedef Utf32BytesDecoder Utf32BytesDecoderProvider();	178 typedef Utf32BytesDecoder Utf32BytesDecoderProvider();

179	179

180 /**	180 /**

181 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type	181 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type

182 * provides an iterator on demand and the iterator will only translate bytes	182 * provides an iterator on demand and the iterator will only translate bytes

183 * as requested by the user of the iterator. (Note: results are not cached.)	183 * as requested by the user of the iterator. (Note: results are not cached.)

184 */	184 */

185 class IterableUtf32Decoder implements Iterable<int> {	185 // TODO(floitsch): Consider removing the extend and switch to implements since

	186 // that's cheaper to allocate.

	187 class IterableUtf32Decoder extends Iterable<int> {

186 final Utf32BytesDecoderProvider codeunitsProvider;	188 final Utf32BytesDecoderProvider codeunitsProvider;

187	189

188 IterableUtf32Decoder._(this.codeunitsProvider);	190 IterableUtf32Decoder._(this.codeunitsProvider);

189	191

190 Utf32BytesDecoder iterator() => codeunitsProvider();	192 Utf32BytesDecoder get iterator => codeunitsProvider();

191 }	193 }

192	194

193 /**	195 /**

194 * Abstrace parent class converts encoded bytes to codepoints.	196 * Abstrace parent class converts encoded bytes to codepoints.

195 */	197 */

196 class Utf32BytesDecoder implements _ListRangeIterator {	198 class Utf32BytesDecoder implements _ListRangeIterator {

197 final _ListRangeIterator utf32EncodedBytesIterator;	199 final _ListRangeIterator utf32EncodedBytesIterator;

198 final int replacementCodepoint;	200 final int replacementCodepoint;

	201 int _current = null;

199	202

200 Utf32BytesDecoder._fromListRangeIterator(	203 Utf32BytesDecoder._fromListRangeIterator(

201 this.utf32EncodedBytesIterator, this.replacementCodepoint);	204 this.utf32EncodedBytesIterator, this.replacementCodepoint);

202	205

203 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [	206 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [

204 int offset = 0, int length,	207 int offset = 0, int length,

205 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	208 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

206 if (length == null) {	209 if (length == null) {

207 length = utf32EncodedBytes.length - offset;	210 length = utf32EncodedBytes.length - offset;

208 }	211 }

209 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) {	212 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) {

210 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,	213 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,

211 false, replacementCodepoint);	214 false, replacementCodepoint);

212 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) {	215 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) {

213 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,	216 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,

214 false, replacementCodepoint);	217 false, replacementCodepoint);

215 } else {	218 } else {

216 return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false,	219 return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false,

217 replacementCodepoint);	220 replacementCodepoint);

218 }	221 }

219 }	222 }

220	223

221 List<int> decodeRest() {	224 List<int> decodeRest() {

222 List<int> codeunits = new List<int>(remaining);	225 List<int> codeunits = new List<int>.fixedLength(remaining);

223 int i = 0;	226 int i = 0;

224 while (hasNext) {	227 while (moveNext()) {

225 codeunits[i++] = next();	228 codeunits[i++] = current;

226 }	229 }

227 return codeunits;	230 return codeunits;

228 }	231 }

229	232

230 bool get hasNext => utf32EncodedBytesIterator.hasNext;	233 int get current => _current;

231	234

232 int next() {	235 bool moveNext() {

	236 _current = null;

233 if (utf32EncodedBytesIterator.remaining < 4) {	237 if (utf32EncodedBytesIterator.remaining < 4) {

234 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining);	238 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining);

235 if (replacementCodepoint != null) {	239 if (replacementCodepoint != null) {

236 return replacementCodepoint;	240 _current = replacementCodepoint;

	241 return true;

237 } else {	242 } else {

238 throw new ArgumentError(	243 throw new ArgumentError(

239 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}");	244 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}");

240 }	245 }

241 } else {	246 } else {

242 int codepoint = decode();	247 int codepoint = decode();

243 if (_validCodepoint(codepoint)) {	248 if (_validCodepoint(codepoint)) {

244 return codepoint;	249 _current = codepoint;

	250 return true;

245 } else if (replacementCodepoint != null) {	251 } else if (replacementCodepoint != null) {

246 return replacementCodepoint;	252 _current = replacementCodepoint;

	253 return true;

247 } else {	254 } else {

248 throw new ArgumentError(	255 throw new ArgumentError(

249 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}");	256 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}");

250 }	257 }

251 }	258 }

252 }	259 }

253	260

254 int get position => utf32EncodedBytesIterator.position ~/ 4;	261 int get position => utf32EncodedBytesIterator.position ~/ 4;

255	262

256 void backup([int by = 1]) {	263 void backup([int by = 1]) {

(...skipping 10 matching lines...) Expand all Loading...
267 }	274 }

268	275

269 /**	276 /**

270 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes	277 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes

271 * to produce the unicode codepoint.	278 * to produce the unicode codepoint.

272 */	279 */

273 class Utf32beBytesDecoder extends Utf32BytesDecoder {	280 class Utf32beBytesDecoder extends Utf32BytesDecoder {

274 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,	281 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,

275 int length, bool stripBom = true,	282 int length, bool stripBom = true,

276 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :	283 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :

277 super._fromListRangeIterator((new _ListRange(utf32EncodedBytes, offset,	284 super._fromListRangeIterator(

278 length)).iterator(), replacementCodepoint) {	285 (new _ListRange(utf32EncodedBytes, offset, length)).iterator,

	286 replacementCodepoint) {

279 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) {	287 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) {

280 skip();	288 skip();

281 }	289 }

282 }	290 }

283	291

284 int decode() {	292 int decode() {

285 int value = utf32EncodedBytesIterator.next();	293 utf32EncodedBytesIterator.moveNext();

286 value = (value << 8) + utf32EncodedBytesIterator.next();	294 int value = utf32EncodedBytesIterator.current;

287 value = (value << 8) + utf32EncodedBytesIterator.next();	295 utf32EncodedBytesIterator.moveNext();

288 value = (value << 8) + utf32EncodedBytesIterator.next();	296 value = (value << 8) + utf32EncodedBytesIterator.current;

	297 utf32EncodedBytesIterator.moveNext();

	298 value = (value << 8) + utf32EncodedBytesIterator.current;

	299 utf32EncodedBytesIterator.moveNext();

	300 value = (value << 8) + utf32EncodedBytesIterator.current;

289 return value;	301 return value;

290 }	302 }

291 }	303 }

292	304

293 /**	305 /**

294 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes	306 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes

295 * to produce the unicode codepoint.	307 * to produce the unicode codepoint.

296 */	308 */

297 class Utf32leBytesDecoder extends Utf32BytesDecoder {	309 class Utf32leBytesDecoder extends Utf32BytesDecoder {

298 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,	310 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,

299 int length, bool stripBom = true,	311 int length, bool stripBom = true,

300 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :	312 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :

301 super._fromListRangeIterator((new _ListRange(utf32EncodedBytes, offset,	313 super._fromListRangeIterator(

302 length)).iterator(), replacementCodepoint) {	314 (new _ListRange(utf32EncodedBytes, offset, length)).iterator,

	315 replacementCodepoint) {

303 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) {	316 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) {

304 skip();	317 skip();

305 }	318 }

306 }	319 }

307	320

308 int decode() {	321 int decode() {

309 int value = (utf32EncodedBytesIterator.next());	322 utf32EncodedBytesIterator.moveNext();

310 value += (utf32EncodedBytesIterator.next() << 8);	323 int value = utf32EncodedBytesIterator.current;

311 value += (utf32EncodedBytesIterator.next() << 16);	324 utf32EncodedBytesIterator.moveNext();

312 value += (utf32EncodedBytesIterator.next() << 24);	325 value += (utf32EncodedBytesIterator.current << 8);

	326 utf32EncodedBytesIterator.moveNext();

	327 value += (utf32EncodedBytesIterator.current << 16);

	328 utf32EncodedBytesIterator.moveNext();

	329 value += (utf32EncodedBytesIterator.current << 24);

313 return value;	330 return value;

314 }	331 }

315 }	332 }

316	333

317 bool _validCodepoint(int codepoint) {	334 bool _validCodepoint(int codepoint) {

318 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) \|\|	335 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) \|\|

319 (codepoint > UNICODE_UTF16_RESERVED_HI &&	336 (codepoint > UNICODE_UTF16_RESERVED_HI &&

320 codepoint < UNICODE_VALID_RANGE_MAX);	337 codepoint < UNICODE_VALID_RANGE_MAX);

321 }	338 }

OLD	NEW

« no previous file with comments | « sdk/lib/utf/utf16.dart ('k') | sdk/lib/utf/utf8.dart » ('j') | no next file with comments »