| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 part of utf; | 5 library utf.utf32; |
| 6 |
| 7 import "dart:collection"; |
| 8 |
| 9 import 'constants.dart'; |
| 10 import 'list_range.dart'; |
| 11 import 'shared.dart'; |
| 6 | 12 |
| 7 /** | 13 /** |
| 8 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert | 14 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert |
| 9 * as much of the input as needed. Determines the byte order from the BOM, | 15 * as much of the input as needed. Determines the byte order from the BOM, |
| 10 * or uses big-endian as a default. This method always strips a leading BOM. | 16 * or uses big-endian as a default. This method always strips a leading BOM. |
| 11 * Set the replacementCharacter to null to throw an ArgumentError | 17 * Set the replacementCharacter to null to throw an ArgumentError |
| 12 * rather than replace the bad value. | 18 * rather than replace the bad value. |
| 13 */ | 19 */ |
| 14 IterableUtf32Decoder decodeUtf32AsIterable(List<int> bytes, [ | 20 IterableUtf32Decoder decodeUtf32AsIterable(List<int> bytes, |
| 15 int offset = 0, int length, | 21 [int offset = 0, |
| 22 int length, |
| 16 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 23 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 17 return new IterableUtf32Decoder._( | 24 return new IterableUtf32Decoder._( |
| 18 () => new Utf32BytesDecoder(bytes, offset, length, replacementCodepoint)); | 25 () => new Utf32BytesDecoder(bytes, offset, length, replacementCodepoint)); |
| 19 } | 26 } |
| 20 | 27 |
| 21 /** | 28 /** |
| 22 * Decodes the UTF-32BE bytes as an iterable. Thus, the consumer can only conver
t | 29 * Decodes the UTF-32BE bytes as an iterable. Thus, the consumer can only conver
t |
| 23 * as much of the input as needed. This method strips a leading BOM by default, | 30 * as much of the input as needed. This method strips a leading BOM by default, |
| 24 * but can be overridden by setting the optional parameter [stripBom] to false. | 31 * but can be overridden by setting the optional parameter [stripBom] to false. |
| 25 * Set the replacementCharacter to null to throw an ArgumentError | 32 * Set the replacementCharacter to null to throw an ArgumentError |
| 26 * rather than replace the bad value. | 33 * rather than replace the bad value. |
| 27 */ | 34 */ |
| 28 IterableUtf32Decoder decodeUtf32beAsIterable(List<int> bytes, [ | 35 IterableUtf32Decoder decodeUtf32beAsIterable(List<int> bytes, |
| 29 int offset = 0, int length, bool stripBom = true, | 36 [int offset = 0, |
| 37 int length, |
| 38 bool stripBom = true, |
| 30 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 39 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 31 return new IterableUtf32Decoder._( | 40 return new IterableUtf32Decoder._(() => new Utf32beBytesDecoder( |
| 32 () => new Utf32beBytesDecoder(bytes, offset, length, stripBom, | 41 bytes, offset, length, stripBom, replacementCodepoint)); |
| 33 replacementCodepoint)); | |
| 34 } | 42 } |
| 35 | 43 |
| 36 /** | 44 /** |
| 37 * Decodes the UTF-32LE bytes as an iterable. Thus, the consumer can only conver
t | 45 * Decodes the UTF-32LE bytes as an iterable. Thus, the consumer can only conver
t |
| 38 * as much of the input as needed. This method strips a leading BOM by default, | 46 * as much of the input as needed. This method strips a leading BOM by default, |
| 39 * but can be overridden by setting the optional parameter [stripBom] to false. | 47 * but can be overridden by setting the optional parameter [stripBom] to false. |
| 40 * Set the replacementCharacter to null to throw an ArgumentError | 48 * Set the replacementCharacter to null to throw an ArgumentError |
| 41 * rather than replace the bad value. | 49 * rather than replace the bad value. |
| 42 */ | 50 */ |
| 43 IterableUtf32Decoder decodeUtf32leAsIterable(List<int> bytes, [ | 51 IterableUtf32Decoder decodeUtf32leAsIterable(List<int> bytes, |
| 44 int offset = 0, int length, bool stripBom = true, | 52 [int offset = 0, |
| 53 int length, |
| 54 bool stripBom = true, |
| 45 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 55 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 46 return new IterableUtf32Decoder._( | 56 return new IterableUtf32Decoder._(() => new Utf32leBytesDecoder( |
| 47 () => new Utf32leBytesDecoder(bytes, offset, length, stripBom, | 57 bytes, offset, length, stripBom, replacementCodepoint)); |
| 48 replacementCodepoint)); | |
| 49 } | 58 } |
| 50 | 59 |
| 51 /** | 60 /** |
| 52 * Produce a String from a sequence of UTF-32 encoded bytes. The parameters | 61 * Produce a String from a sequence of UTF-32 encoded bytes. The parameters |
| 53 * allow an offset into a list of bytes (as int), limiting the length of the | 62 * allow an offset into a list of bytes (as int), limiting the length of the |
| 54 * values be decoded and the ability of override the default Unicode | 63 * values be decoded and the ability of override the default Unicode |
| 55 * replacement character. Set the replacementCharacter to null to throw an | 64 * replacement character. Set the replacementCharacter to null to throw an |
| 56 * ArgumentError rather than replace the bad value. | 65 * ArgumentError rather than replace the bad value. |
| 57 */ | 66 */ |
| 58 String decodeUtf32(List<int> bytes, [int offset = 0, int length, | 67 String decodeUtf32(List<int> bytes, |
| 68 [int offset = 0, |
| 69 int length, |
| 59 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 70 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 60 return new String.fromCharCodes((new Utf32BytesDecoder(bytes, offset, length, | 71 return new String.fromCharCodes( |
| 61 replacementCodepoint)).decodeRest()); | 72 (new Utf32BytesDecoder(bytes, offset, length, replacementCodepoint)) |
| 73 .decodeRest()); |
| 62 } | 74 } |
| 75 |
| 63 /** | 76 /** |
| 64 * Produce a String from a sequence of UTF-32BE encoded bytes. The parameters | 77 * Produce a String from a sequence of UTF-32BE encoded bytes. The parameters |
| 65 * allow an offset into a list of bytes (as int), limiting the length of the | 78 * allow an offset into a list of bytes (as int), limiting the length of the |
| 66 * values be decoded and the ability of override the default Unicode | 79 * values be decoded and the ability of override the default Unicode |
| 67 * replacement character. Set the replacementCharacter to null to throw an | 80 * replacement character. Set the replacementCharacter to null to throw an |
| 68 * ArgumentError rather than replace the bad value. | 81 * ArgumentError rather than replace the bad value. |
| 69 */ | 82 */ |
| 70 String decodeUtf32be( | 83 String decodeUtf32be(List<int> bytes, |
| 71 List<int> bytes, [int offset = 0, int length, bool stripBom = true, | 84 [int offset = 0, |
| 72 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) => | 85 int length, |
| 73 new String.fromCharCodes((new Utf32beBytesDecoder(bytes, offset, length, | 86 bool stripBom = true, |
| 74 stripBom, replacementCodepoint)).decodeRest()); | 87 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) => |
| 88 new String.fromCharCodes((new Utf32beBytesDecoder( |
| 89 bytes, offset, length, stripBom, replacementCodepoint)) |
| 90 .decodeRest()); |
| 75 | 91 |
| 76 /** | 92 /** |
| 77 * Produce a String from a sequence of UTF-32LE encoded bytes. The parameters | 93 * Produce a String from a sequence of UTF-32LE encoded bytes. The parameters |
| 78 * allow an offset into a list of bytes (as int), limiting the length of the | 94 * allow an offset into a list of bytes (as int), limiting the length of the |
| 79 * values be decoded and the ability of override the default Unicode | 95 * values be decoded and the ability of override the default Unicode |
| 80 * replacement character. Set the replacementCharacter to null to throw an | 96 * replacement character. Set the replacementCharacter to null to throw an |
| 81 * ArgumentError rather than replace the bad value. | 97 * ArgumentError rather than replace the bad value. |
| 82 */ | 98 */ |
| 83 String decodeUtf32le( | 99 String decodeUtf32le(List<int> bytes, |
| 84 List<int> bytes, [int offset = 0, int length, bool stripBom = true, | 100 [int offset = 0, |
| 85 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) => | 101 int length, |
| 86 new String.fromCharCodes((new Utf32leBytesDecoder(bytes, offset, length, | 102 bool stripBom = true, |
| 87 stripBom, replacementCodepoint)).decodeRest()); | 103 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) => |
| 104 new String.fromCharCodes((new Utf32leBytesDecoder( |
| 105 bytes, offset, length, stripBom, replacementCodepoint)) |
| 106 .decodeRest()); |
| 88 | 107 |
| 89 /** | 108 /** |
| 90 * Produce a list of UTF-32 encoded bytes. This method prefixes the resulting | 109 * Produce a list of UTF-32 encoded bytes. This method prefixes the resulting |
| 91 * bytes with a big-endian byte-order-marker. | 110 * bytes with a big-endian byte-order-marker. |
| 92 */ | 111 */ |
| 93 List<int> encodeUtf32(String str) => | 112 List<int> encodeUtf32(String str) => encodeUtf32be(str, true); |
| 94 encodeUtf32be(str, true); | |
| 95 | 113 |
| 96 /** | 114 /** |
| 97 * Produce a list of UTF-32BE encoded bytes. By default, this method produces | 115 * Produce a list of UTF-32BE encoded bytes. By default, this method produces |
| 98 * UTF-32BE bytes with no BOM. | 116 * UTF-32BE bytes with no BOM. |
| 99 */ | 117 */ |
| 100 List<int> encodeUtf32be(String str, [bool writeBOM = false]) { | 118 List<int> encodeUtf32be(String str, [bool writeBOM = false]) { |
| 101 List<int> utf32CodeUnits = stringToCodepoints(str); | 119 List<int> utf32CodeUnits = stringToCodepoints(str); |
| 102 List<int> encoding = new List<int>(4 * utf32CodeUnits.length + | 120 List<int> encoding = |
| 103 (writeBOM ? 4 : 0)); | 121 new List<int>(4 * utf32CodeUnits.length + (writeBOM ? 4 : 0)); |
| 104 int i = 0; | 122 int i = 0; |
| 105 if (writeBOM) { | 123 if (writeBOM) { |
| 106 encoding[i++] = 0; | 124 encoding[i++] = 0; |
| 107 encoding[i++] = 0; | 125 encoding[i++] = 0; |
| 108 encoding[i++] = UNICODE_UTF_BOM_HI; | 126 encoding[i++] = UNICODE_UTF_BOM_HI; |
| 109 encoding[i++] = UNICODE_UTF_BOM_LO; | 127 encoding[i++] = UNICODE_UTF_BOM_LO; |
| 110 } | 128 } |
| 111 for (int unit in utf32CodeUnits) { | 129 for (int unit in utf32CodeUnits) { |
| 112 encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK; | 130 encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK; |
| 113 encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK; | 131 encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK; |
| 114 encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK; | 132 encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK; |
| 115 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; | 133 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; |
| 116 } | 134 } |
| 117 return encoding; | 135 return encoding; |
| 118 } | 136 } |
| 119 | 137 |
| 120 /** | 138 /** |
| 121 * Produce a list of UTF-32LE encoded bytes. By default, this method produces | 139 * Produce a list of UTF-32LE encoded bytes. By default, this method produces |
| 122 * UTF-32BE bytes with no BOM. | 140 * UTF-32BE bytes with no BOM. |
| 123 */ | 141 */ |
| 124 List<int> encodeUtf32le(String str, [bool writeBOM = false]) { | 142 List<int> encodeUtf32le(String str, [bool writeBOM = false]) { |
| 125 List<int> utf32CodeUnits = stringToCodepoints(str); | 143 List<int> utf32CodeUnits = stringToCodepoints(str); |
| 126 List<int> encoding = new List<int>(4 * utf32CodeUnits.length + | 144 List<int> encoding = |
| 127 (writeBOM ? 4 : 0)); | 145 new List<int>(4 * utf32CodeUnits.length + (writeBOM ? 4 : 0)); |
| 128 int i = 0; | 146 int i = 0; |
| 129 if (writeBOM) { | 147 if (writeBOM) { |
| 130 encoding[i++] = UNICODE_UTF_BOM_LO; | 148 encoding[i++] = UNICODE_UTF_BOM_LO; |
| 131 encoding[i++] = UNICODE_UTF_BOM_HI; | 149 encoding[i++] = UNICODE_UTF_BOM_HI; |
| 132 encoding[i++] = 0; | 150 encoding[i++] = 0; |
| 133 encoding[i++] = 0; | 151 encoding[i++] = 0; |
| 134 } | 152 } |
| 135 for (int unit in utf32CodeUnits) { | 153 for (int unit in utf32CodeUnits) { |
| 136 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; | 154 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; |
| 137 encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK; | 155 encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK; |
| 138 encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK; | 156 encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK; |
| 139 encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK; | 157 encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK; |
| 140 } | 158 } |
| 141 return encoding; | 159 return encoding; |
| 142 } | 160 } |
| 143 | 161 |
| 144 /** | 162 /** |
| 145 * Identifies whether a List of bytes starts (based on offset) with a | 163 * Identifies whether a List of bytes starts (based on offset) with a |
| 146 * byte-order marker (BOM). | 164 * byte-order marker (BOM). |
| 147 */ | 165 */ |
| 148 bool hasUtf32Bom( | 166 bool hasUtf32Bom(List<int> utf32EncodedBytes, [int offset = 0, int length]) { |
| 149 List<int> utf32EncodedBytes, [int offset = 0, int length]) { | |
| 150 return hasUtf32beBom(utf32EncodedBytes, offset, length) || | 167 return hasUtf32beBom(utf32EncodedBytes, offset, length) || |
| 151 hasUtf32leBom(utf32EncodedBytes, offset, length); | 168 hasUtf32leBom(utf32EncodedBytes, offset, length); |
| 152 } | 169 } |
| 153 | 170 |
| 154 /** | 171 /** |
| 155 * Identifies whether a List of bytes starts (based on offset) with a | 172 * Identifies whether a List of bytes starts (based on offset) with a |
| 156 * big-endian byte-order marker (BOM). | 173 * big-endian byte-order marker (BOM). |
| 157 */ | 174 */ |
| 158 bool hasUtf32beBom(List<int> utf32EncodedBytes, [int offset = 0, int length]) { | 175 bool hasUtf32beBom(List<int> utf32EncodedBytes, [int offset = 0, int length]) { |
| 159 int end = length != null ? offset + length : utf32EncodedBytes.length; | 176 int end = length != null ? offset + length : utf32EncodedBytes.length; |
| 160 return (offset + 4) <= end && | 177 return (offset + 4) <= end && |
| 161 utf32EncodedBytes[offset] == 0 && utf32EncodedBytes[offset + 1] == 0 && | 178 utf32EncodedBytes[offset] == 0 && |
| 179 utf32EncodedBytes[offset + 1] == 0 && |
| 162 utf32EncodedBytes[offset + 2] == UNICODE_UTF_BOM_HI && | 180 utf32EncodedBytes[offset + 2] == UNICODE_UTF_BOM_HI && |
| 163 utf32EncodedBytes[offset + 3] == UNICODE_UTF_BOM_LO; | 181 utf32EncodedBytes[offset + 3] == UNICODE_UTF_BOM_LO; |
| 164 } | 182 } |
| 165 | 183 |
| 166 /** | 184 /** |
| 167 * Identifies whether a List of bytes starts (based on offset) with a | 185 * Identifies whether a List of bytes starts (based on offset) with a |
| 168 * little-endian byte-order marker (BOM). | 186 * little-endian byte-order marker (BOM). |
| 169 */ | 187 */ |
| 170 bool hasUtf32leBom(List<int> utf32EncodedBytes, [int offset = 0, int length]) { | 188 bool hasUtf32leBom(List<int> utf32EncodedBytes, [int offset = 0, int length]) { |
| 171 int end = length != null ? offset + length : utf32EncodedBytes.length; | 189 int end = length != null ? offset + length : utf32EncodedBytes.length; |
| 172 return (offset + 4) <= end && | 190 return (offset + 4) <= end && |
| 173 utf32EncodedBytes[offset] == UNICODE_UTF_BOM_LO && | 191 utf32EncodedBytes[offset] == UNICODE_UTF_BOM_LO && |
| 174 utf32EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI && | 192 utf32EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI && |
| 175 utf32EncodedBytes[offset + 2] == 0 && utf32EncodedBytes[offset + 3] == 0; | 193 utf32EncodedBytes[offset + 2] == 0 && |
| 194 utf32EncodedBytes[offset + 3] == 0; |
| 176 } | 195 } |
| 177 | 196 |
| 178 typedef Utf32BytesDecoder Utf32BytesDecoderProvider(); | 197 typedef Utf32BytesDecoder Utf32BytesDecoderProvider(); |
| 179 | 198 |
| 180 /** | 199 /** |
| 181 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type | 200 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type |
| 182 * provides an iterator on demand and the iterator will only translate bytes | 201 * provides an iterator on demand and the iterator will only translate bytes |
| 183 * as requested by the user of the iterator. (Note: results are not cached.) | 202 * as requested by the user of the iterator. (Note: results are not cached.) |
| 184 */ | 203 */ |
| 185 // TODO(floitsch): Consider removing the extend and switch to implements since | 204 // TODO(floitsch): Consider removing the extend and switch to implements since |
| (...skipping 11 matching lines...) Expand all Loading... |
| 197 */ | 216 */ |
| 198 abstract class Utf32BytesDecoder implements ListRangeIterator { | 217 abstract class Utf32BytesDecoder implements ListRangeIterator { |
| 199 // TODO(kevmoo): should this field be private? | 218 // TODO(kevmoo): should this field be private? |
| 200 final ListRangeIterator utf32EncodedBytesIterator; | 219 final ListRangeIterator utf32EncodedBytesIterator; |
| 201 final int replacementCodepoint; | 220 final int replacementCodepoint; |
| 202 int _current = null; | 221 int _current = null; |
| 203 | 222 |
| 204 Utf32BytesDecoder._fromListRangeIterator( | 223 Utf32BytesDecoder._fromListRangeIterator( |
| 205 this.utf32EncodedBytesIterator, this.replacementCodepoint); | 224 this.utf32EncodedBytesIterator, this.replacementCodepoint); |
| 206 | 225 |
| 207 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [ | 226 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, |
| 208 int offset = 0, int length, | 227 [int offset = 0, |
| 228 int length, |
| 209 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 229 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 210 if (length == null) { | 230 if (length == null) { |
| 211 length = utf32EncodedBytes.length - offset; | 231 length = utf32EncodedBytes.length - offset; |
| 212 } | 232 } |
| 213 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) { | 233 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) { |
| 214 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, | 234 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, |
| 215 false, replacementCodepoint); | 235 false, replacementCodepoint); |
| 216 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) { | 236 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) { |
| 217 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, | 237 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, |
| 218 false, replacementCodepoint); | 238 false, replacementCodepoint); |
| 219 } else { | 239 } else { |
| 220 return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false, | 240 return new Utf32beBytesDecoder( |
| 221 replacementCodepoint); | 241 utf32EncodedBytes, offset, length, false, replacementCodepoint); |
| 222 } | 242 } |
| 223 } | 243 } |
| 224 | 244 |
| 225 List<int> decodeRest() { | 245 List<int> decodeRest() { |
| 226 List<int> codeunits = new List<int>(remaining); | 246 List<int> codeunits = new List<int>(remaining); |
| 227 int i = 0; | 247 int i = 0; |
| 228 while (moveNext()) { | 248 while (moveNext()) { |
| 229 codeunits[i++] = current; | 249 codeunits[i++] = current; |
| 230 } | 250 } |
| 231 return codeunits; | 251 return codeunits; |
| 232 } | 252 } |
| 233 | 253 |
| 234 int get current => _current; | 254 int get current => _current; |
| 235 | 255 |
| 236 bool moveNext() { | 256 bool moveNext() { |
| 237 _current = null; | 257 _current = null; |
| 238 int remaining = utf32EncodedBytesIterator.remaining; | 258 int remaining = utf32EncodedBytesIterator.remaining; |
| 239 if (remaining == 0) { | 259 if (remaining == 0) { |
| 240 _current = null; | 260 _current = null; |
| 241 return false; | 261 return false; |
| 242 } | 262 } |
| 243 if (remaining < 4) { | 263 if (remaining < 4) { |
| 244 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining); | 264 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining); |
| 245 if (replacementCodepoint != null) { | 265 if (replacementCodepoint != null) { |
| 246 _current = replacementCodepoint; | 266 _current = replacementCodepoint; |
| 247 return true; | 267 return true; |
| 248 } else { | 268 } else { |
| 249 throw new ArgumentError( | 269 throw new ArgumentError( |
| 250 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}"); | 270 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}"); |
| 251 } | 271 } |
| 252 } | 272 } |
| 253 int codepoint = decode(); | 273 int codepoint = decode(); |
| 254 if (_validCodepoint(codepoint)) { | 274 if (_validCodepoint(codepoint)) { |
| 255 _current = codepoint; | 275 _current = codepoint; |
| 256 return true; | 276 return true; |
| 257 } else if (replacementCodepoint != null) { | 277 } else if (replacementCodepoint != null) { |
| (...skipping 18 matching lines...) Expand all Loading... |
| 276 } | 296 } |
| 277 | 297 |
| 278 int decode(); | 298 int decode(); |
| 279 } | 299 } |
| 280 | 300 |
| 281 /** | 301 /** |
| 282 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes | 302 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes |
| 283 * to produce the unicode codepoint. | 303 * to produce the unicode codepoint. |
| 284 */ | 304 */ |
| 285 class Utf32beBytesDecoder extends Utf32BytesDecoder { | 305 class Utf32beBytesDecoder extends Utf32BytesDecoder { |
| 286 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, | 306 Utf32beBytesDecoder(List<int> utf32EncodedBytes, |
| 287 int length, bool stripBom = true, | 307 [int offset = 0, |
| 288 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 308 int length, |
| 289 super._fromListRangeIterator( | 309 bool stripBom = true, |
| 290 (new ListRange(utf32EncodedBytes, offset, length)).iterator, | 310 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) |
| 291 replacementCodepoint) { | 311 : super._fromListRangeIterator( |
| 312 (new ListRange(utf32EncodedBytes, offset, length)).iterator, |
| 313 replacementCodepoint) { |
| 292 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) { | 314 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) { |
| 293 skip(); | 315 skip(); |
| 294 } | 316 } |
| 295 } | 317 } |
| 296 | 318 |
| 297 int decode() { | 319 int decode() { |
| 298 utf32EncodedBytesIterator.moveNext(); | 320 utf32EncodedBytesIterator.moveNext(); |
| 299 int value = utf32EncodedBytesIterator.current; | 321 int value = utf32EncodedBytesIterator.current; |
| 300 utf32EncodedBytesIterator.moveNext(); | 322 utf32EncodedBytesIterator.moveNext(); |
| 301 value = (value << 8) + utf32EncodedBytesIterator.current; | 323 value = (value << 8) + utf32EncodedBytesIterator.current; |
| 302 utf32EncodedBytesIterator.moveNext(); | 324 utf32EncodedBytesIterator.moveNext(); |
| 303 value = (value << 8) + utf32EncodedBytesIterator.current; | 325 value = (value << 8) + utf32EncodedBytesIterator.current; |
| 304 utf32EncodedBytesIterator.moveNext(); | 326 utf32EncodedBytesIterator.moveNext(); |
| 305 value = (value << 8) + utf32EncodedBytesIterator.current; | 327 value = (value << 8) + utf32EncodedBytesIterator.current; |
| 306 return value; | 328 return value; |
| 307 } | 329 } |
| 308 } | 330 } |
| 309 | 331 |
| 310 /** | 332 /** |
| 311 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes | 333 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes |
| 312 * to produce the unicode codepoint. | 334 * to produce the unicode codepoint. |
| 313 */ | 335 */ |
| 314 class Utf32leBytesDecoder extends Utf32BytesDecoder { | 336 class Utf32leBytesDecoder extends Utf32BytesDecoder { |
| 315 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, | 337 Utf32leBytesDecoder(List<int> utf32EncodedBytes, |
| 316 int length, bool stripBom = true, | 338 [int offset = 0, |
| 317 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 339 int length, |
| 318 super._fromListRangeIterator( | 340 bool stripBom = true, |
| 319 (new ListRange(utf32EncodedBytes, offset, length)).iterator, | 341 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) |
| 320 replacementCodepoint) { | 342 : super._fromListRangeIterator( |
| 343 (new ListRange(utf32EncodedBytes, offset, length)).iterator, |
| 344 replacementCodepoint) { |
| 321 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) { | 345 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) { |
| 322 skip(); | 346 skip(); |
| 323 } | 347 } |
| 324 } | 348 } |
| 325 | 349 |
| 326 int decode() { | 350 int decode() { |
| 327 utf32EncodedBytesIterator.moveNext(); | 351 utf32EncodedBytesIterator.moveNext(); |
| 328 int value = utf32EncodedBytesIterator.current; | 352 int value = utf32EncodedBytesIterator.current; |
| 329 utf32EncodedBytesIterator.moveNext(); | 353 utf32EncodedBytesIterator.moveNext(); |
| 330 value += (utf32EncodedBytesIterator.current << 8); | 354 value += (utf32EncodedBytesIterator.current << 8); |
| 331 utf32EncodedBytesIterator.moveNext(); | 355 utf32EncodedBytesIterator.moveNext(); |
| 332 value += (utf32EncodedBytesIterator.current << 16); | 356 value += (utf32EncodedBytesIterator.current << 16); |
| 333 utf32EncodedBytesIterator.moveNext(); | 357 utf32EncodedBytesIterator.moveNext(); |
| 334 value += (utf32EncodedBytesIterator.current << 24); | 358 value += (utf32EncodedBytesIterator.current << 24); |
| 335 return value; | 359 return value; |
| 336 } | 360 } |
| 337 } | 361 } |
| 338 | 362 |
| 339 bool _validCodepoint(int codepoint) { | 363 bool _validCodepoint(int codepoint) { |
| 340 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) || | 364 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) || |
| 341 (codepoint > UNICODE_UTF16_RESERVED_HI && | 365 (codepoint > UNICODE_UTF16_RESERVED_HI && |
| 342 codepoint < UNICODE_VALID_RANGE_MAX); | 366 codepoint < UNICODE_VALID_RANGE_MAX); |
| 343 } | 367 } |
| OLD | NEW |