Index: utf/lib/src/utf/utf32.dart |
diff --git a/utf/lib/src/utf/utf32.dart b/utf/lib/src/utf/utf32.dart |
deleted file mode 100644 |
index e51009d6bbac57f4afe2bf7885bcf66121918913..0000000000000000000000000000000000000000 |
--- a/utf/lib/src/utf/utf32.dart |
+++ /dev/null |
@@ -1,343 +0,0 @@ |
-// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
-// for details. All rights reserved. Use of this source code is governed by a |
-// BSD-style license that can be found in the LICENSE file. |
- |
-part of utf; |
- |
-/** |
- * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert |
- * as much of the input as needed. Determines the byte order from the BOM, |
- * or uses big-endian as a default. This method always strips a leading BOM. |
- * Set the replacementCharacter to null to throw an ArgumentError |
- * rather than replace the bad value. |
- */ |
-IterableUtf32Decoder decodeUtf32AsIterable(List<int> bytes, [ |
- int offset = 0, int length, |
- int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
- return new IterableUtf32Decoder._( |
- () => new Utf32BytesDecoder(bytes, offset, length, replacementCodepoint)); |
-} |
- |
-/** |
- * Decodes the UTF-32BE bytes as an iterable. Thus, the consumer can only convert |
- * as much of the input as needed. This method strips a leading BOM by default, |
- * but can be overridden by setting the optional parameter [stripBom] to false. |
- * Set the replacementCharacter to null to throw an ArgumentError |
- * rather than replace the bad value. |
- */ |
-IterableUtf32Decoder decodeUtf32beAsIterable(List<int> bytes, [ |
- int offset = 0, int length, bool stripBom = true, |
- int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
- return new IterableUtf32Decoder._( |
- () => new Utf32beBytesDecoder(bytes, offset, length, stripBom, |
- replacementCodepoint)); |
-} |
- |
-/** |
- * Decodes the UTF-32LE bytes as an iterable. Thus, the consumer can only convert |
- * as much of the input as needed. This method strips a leading BOM by default, |
- * but can be overridden by setting the optional parameter [stripBom] to false. |
- * Set the replacementCharacter to null to throw an ArgumentError |
- * rather than replace the bad value. |
- */ |
-IterableUtf32Decoder decodeUtf32leAsIterable(List<int> bytes, [ |
- int offset = 0, int length, bool stripBom = true, |
- int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
- return new IterableUtf32Decoder._( |
- () => new Utf32leBytesDecoder(bytes, offset, length, stripBom, |
- replacementCodepoint)); |
-} |
- |
-/** |
- * Produce a String from a sequence of UTF-32 encoded bytes. The parameters |
- * allow an offset into a list of bytes (as int), limiting the length of the |
- * values be decoded and the ability of override the default Unicode |
- * replacement character. Set the replacementCharacter to null to throw an |
- * ArgumentError rather than replace the bad value. |
- */ |
-String decodeUtf32(List<int> bytes, [int offset = 0, int length, |
- int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
- return new String.fromCharCodes((new Utf32BytesDecoder(bytes, offset, length, |
- replacementCodepoint)).decodeRest()); |
-} |
-/** |
- * Produce a String from a sequence of UTF-32BE encoded bytes. The parameters |
- * allow an offset into a list of bytes (as int), limiting the length of the |
- * values be decoded and the ability of override the default Unicode |
- * replacement character. Set the replacementCharacter to null to throw an |
- * ArgumentError rather than replace the bad value. |
- */ |
-String decodeUtf32be( |
- List<int> bytes, [int offset = 0, int length, bool stripBom = true, |
- int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) => |
- new String.fromCharCodes((new Utf32beBytesDecoder(bytes, offset, length, |
- stripBom, replacementCodepoint)).decodeRest()); |
- |
-/** |
- * Produce a String from a sequence of UTF-32LE encoded bytes. The parameters |
- * allow an offset into a list of bytes (as int), limiting the length of the |
- * values be decoded and the ability of override the default Unicode |
- * replacement character. Set the replacementCharacter to null to throw an |
- * ArgumentError rather than replace the bad value. |
- */ |
-String decodeUtf32le( |
- List<int> bytes, [int offset = 0, int length, bool stripBom = true, |
- int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) => |
- new String.fromCharCodes((new Utf32leBytesDecoder(bytes, offset, length, |
- stripBom, replacementCodepoint)).decodeRest()); |
- |
-/** |
- * Produce a list of UTF-32 encoded bytes. This method prefixes the resulting |
- * bytes with a big-endian byte-order-marker. |
- */ |
-List<int> encodeUtf32(String str) => |
- encodeUtf32be(str, true); |
- |
-/** |
- * Produce a list of UTF-32BE encoded bytes. By default, this method produces |
- * UTF-32BE bytes with no BOM. |
- */ |
-List<int> encodeUtf32be(String str, [bool writeBOM = false]) { |
- List<int> utf32CodeUnits = stringToCodepoints(str); |
- List<int> encoding = new List<int>(4 * utf32CodeUnits.length + |
- (writeBOM ? 4 : 0)); |
- int i = 0; |
- if (writeBOM) { |
- encoding[i++] = 0; |
- encoding[i++] = 0; |
- encoding[i++] = UNICODE_UTF_BOM_HI; |
- encoding[i++] = UNICODE_UTF_BOM_LO; |
- } |
- for (int unit in utf32CodeUnits) { |
- encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK; |
- encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK; |
- encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK; |
- encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; |
- } |
- return encoding; |
-} |
- |
-/** |
- * Produce a list of UTF-32LE encoded bytes. By default, this method produces |
- * UTF-32BE bytes with no BOM. |
- */ |
-List<int> encodeUtf32le(String str, [bool writeBOM = false]) { |
- List<int> utf32CodeUnits = stringToCodepoints(str); |
- List<int> encoding = new List<int>(4 * utf32CodeUnits.length + |
- (writeBOM ? 4 : 0)); |
- int i = 0; |
- if (writeBOM) { |
- encoding[i++] = UNICODE_UTF_BOM_LO; |
- encoding[i++] = UNICODE_UTF_BOM_HI; |
- encoding[i++] = 0; |
- encoding[i++] = 0; |
- } |
- for (int unit in utf32CodeUnits) { |
- encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; |
- encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK; |
- encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK; |
- encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK; |
- } |
- return encoding; |
-} |
- |
-/** |
- * Identifies whether a List of bytes starts (based on offset) with a |
- * byte-order marker (BOM). |
- */ |
-bool hasUtf32Bom( |
- List<int> utf32EncodedBytes, [int offset = 0, int length]) { |
- return hasUtf32beBom(utf32EncodedBytes, offset, length) || |
- hasUtf32leBom(utf32EncodedBytes, offset, length); |
-} |
- |
-/** |
- * Identifies whether a List of bytes starts (based on offset) with a |
- * big-endian byte-order marker (BOM). |
- */ |
-bool hasUtf32beBom(List<int> utf32EncodedBytes, [int offset = 0, int length]) { |
- int end = length != null ? offset + length : utf32EncodedBytes.length; |
- return (offset + 4) <= end && |
- utf32EncodedBytes[offset] == 0 && utf32EncodedBytes[offset + 1] == 0 && |
- utf32EncodedBytes[offset + 2] == UNICODE_UTF_BOM_HI && |
- utf32EncodedBytes[offset + 3] == UNICODE_UTF_BOM_LO; |
-} |
- |
-/** |
- * Identifies whether a List of bytes starts (based on offset) with a |
- * little-endian byte-order marker (BOM). |
- */ |
-bool hasUtf32leBom(List<int> utf32EncodedBytes, [int offset = 0, int length]) { |
- int end = length != null ? offset + length : utf32EncodedBytes.length; |
- return (offset + 4) <= end && |
- utf32EncodedBytes[offset] == UNICODE_UTF_BOM_LO && |
- utf32EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI && |
- utf32EncodedBytes[offset + 2] == 0 && utf32EncodedBytes[offset + 3] == 0; |
-} |
- |
-typedef Utf32BytesDecoder Utf32BytesDecoderProvider(); |
- |
-/** |
- * Return type of [decodeUtf32AsIterable] and variants. The Iterable type |
- * provides an iterator on demand and the iterator will only translate bytes |
- * as requested by the user of the iterator. (Note: results are not cached.) |
- */ |
-// TODO(floitsch): Consider removing the extend and switch to implements since |
-// that's cheaper to allocate. |
-class IterableUtf32Decoder extends IterableBase<int> { |
- final Utf32BytesDecoderProvider codeunitsProvider; |
- |
- IterableUtf32Decoder._(this.codeunitsProvider); |
- |
- Utf32BytesDecoder get iterator => codeunitsProvider(); |
-} |
- |
-/** |
- * Abstrace parent class converts encoded bytes to codepoints. |
- */ |
-abstract class Utf32BytesDecoder implements ListRangeIterator { |
- // TODO(kevmoo): should this field be private? |
- final ListRangeIterator utf32EncodedBytesIterator; |
- final int replacementCodepoint; |
- int _current = null; |
- |
- Utf32BytesDecoder._fromListRangeIterator( |
- this.utf32EncodedBytesIterator, this.replacementCodepoint); |
- |
- factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [ |
- int offset = 0, int length, |
- int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
- if (length == null) { |
- length = utf32EncodedBytes.length - offset; |
- } |
- if (hasUtf32beBom(utf32EncodedBytes, offset, length)) { |
- return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, |
- false, replacementCodepoint); |
- } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) { |
- return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, |
- false, replacementCodepoint); |
- } else { |
- return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false, |
- replacementCodepoint); |
- } |
- } |
- |
- List<int> decodeRest() { |
- List<int> codeunits = new List<int>(remaining); |
- int i = 0; |
- while (moveNext()) { |
- codeunits[i++] = current; |
- } |
- return codeunits; |
- } |
- |
- int get current => _current; |
- |
- bool moveNext() { |
- _current = null; |
- int remaining = utf32EncodedBytesIterator.remaining; |
- if (remaining == 0) { |
- _current = null; |
- return false; |
- } |
- if (remaining < 4) { |
- utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining); |
- if (replacementCodepoint != null) { |
- _current = replacementCodepoint; |
- return true; |
- } else { |
- throw new ArgumentError( |
- "Invalid UTF32 at ${utf32EncodedBytesIterator.position}"); |
- } |
- } |
- int codepoint = decode(); |
- if (_validCodepoint(codepoint)) { |
- _current = codepoint; |
- return true; |
- } else if (replacementCodepoint != null) { |
- _current = replacementCodepoint; |
- return true; |
- } else { |
- throw new ArgumentError( |
- "Invalid UTF32 at ${utf32EncodedBytesIterator.position}"); |
- } |
- } |
- |
- int get position => utf32EncodedBytesIterator.position ~/ 4; |
- |
- void backup([int by = 1]) { |
- utf32EncodedBytesIterator.backup(4 * by); |
- } |
- |
- int get remaining => (utf32EncodedBytesIterator.remaining + 3) ~/ 4; |
- |
- void skip([int count = 1]) { |
- utf32EncodedBytesIterator.skip(4 * count); |
- } |
- |
- int decode(); |
-} |
- |
-/** |
- * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes |
- * to produce the unicode codepoint. |
- */ |
-class Utf32beBytesDecoder extends Utf32BytesDecoder { |
- Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, |
- int length, bool stripBom = true, |
- int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
- super._fromListRangeIterator( |
- (new ListRange(utf32EncodedBytes, offset, length)).iterator, |
- replacementCodepoint) { |
- if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) { |
- skip(); |
- } |
- } |
- |
- int decode() { |
- utf32EncodedBytesIterator.moveNext(); |
- int value = utf32EncodedBytesIterator.current; |
- utf32EncodedBytesIterator.moveNext(); |
- value = (value << 8) + utf32EncodedBytesIterator.current; |
- utf32EncodedBytesIterator.moveNext(); |
- value = (value << 8) + utf32EncodedBytesIterator.current; |
- utf32EncodedBytesIterator.moveNext(); |
- value = (value << 8) + utf32EncodedBytesIterator.current; |
- return value; |
- } |
-} |
- |
-/** |
- * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes |
- * to produce the unicode codepoint. |
- */ |
-class Utf32leBytesDecoder extends Utf32BytesDecoder { |
- Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, |
- int length, bool stripBom = true, |
- int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
- super._fromListRangeIterator( |
- (new ListRange(utf32EncodedBytes, offset, length)).iterator, |
- replacementCodepoint) { |
- if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) { |
- skip(); |
- } |
- } |
- |
- int decode() { |
- utf32EncodedBytesIterator.moveNext(); |
- int value = utf32EncodedBytesIterator.current; |
- utf32EncodedBytesIterator.moveNext(); |
- value += (utf32EncodedBytesIterator.current << 8); |
- utf32EncodedBytesIterator.moveNext(); |
- value += (utf32EncodedBytesIterator.current << 16); |
- utf32EncodedBytesIterator.moveNext(); |
- value += (utf32EncodedBytesIterator.current << 24); |
- return value; |
- } |
-} |
- |
-bool _validCodepoint(int codepoint) { |
- return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) || |
- (codepoint > UNICODE_UTF16_RESERVED_HI && |
- codepoint < UNICODE_VALID_RANGE_MAX); |
-} |