| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 // TODO(jmesserly): would be nice to have this on String (dartbug.com/6501). | 5 // TODO(jmesserly): would be nice to have this on String (dartbug.com/6501). |
| 6 /** | 6 /** |
| 7 * Provide a list of Unicode codepoints for a given string. | 7 * Provide a list of Unicode codepoints for a given string. |
| 8 */ | 8 */ |
| 9 List<int> stringToCodepoints(String str) { | 9 List<int> stringToCodepoints(String str) { |
| 10 // Note: str.charCodes gives us 16-bit code units on all Dart implementations. | 10 // Note: str.charCodes gives us 16-bit code units on all Dart implementations. |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 61 const int UNICODE_UTF16_OFFSET = 0x10000; | 61 const int UNICODE_UTF16_OFFSET = 0x10000; |
| 62 const int UNICODE_UTF16_SURROGATE_UNIT_0_BASE = 0xd800; | 62 const int UNICODE_UTF16_SURROGATE_UNIT_0_BASE = 0xd800; |
| 63 const int UNICODE_UTF16_SURROGATE_UNIT_1_BASE = 0xdc00; | 63 const int UNICODE_UTF16_SURROGATE_UNIT_1_BASE = 0xdc00; |
| 64 const int UNICODE_UTF16_HI_MASK = 0xffc00; | 64 const int UNICODE_UTF16_HI_MASK = 0xffc00; |
| 65 const int UNICODE_UTF16_LO_MASK = 0x3ff; | 65 const int UNICODE_UTF16_LO_MASK = 0x3ff; |
| 66 | 66 |
| 67 /** | 67 /** |
| 68 * Encode code points as UTF16 code units. | 68 * Encode code points as UTF16 code units. |
| 69 */ | 69 */ |
| 70 List<int> _codepointsToUtf16CodeUnits( | 70 List<int> _codepointsToUtf16CodeUnits( |
| 71 List<int> codepoints, [int offset = 0, int length, | 71 List<int> codepoints, |
| 72 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 72 [int offset = 0, |
| 73 int length, |
| 74 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 73 | 75 |
| 74 _ListRange listRange = new _ListRange(codepoints, offset, length); | 76 _ListRange listRange = new _ListRange(codepoints, offset, length); |
| 75 int encodedLength = 0; | 77 int encodedLength = 0; |
| 76 for (int value in listRange) { | 78 for (int value in listRange) { |
| 77 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || | 79 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || |
| 78 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { | 80 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { |
| 79 encodedLength++; | 81 encodedLength++; |
| 80 } else if (value > UNICODE_PLANE_ONE_MAX && | 82 } else if (value > UNICODE_PLANE_ONE_MAX && |
| 81 value <= UNICODE_VALID_RANGE_MAX) { | 83 value <= UNICODE_VALID_RANGE_MAX) { |
| 82 encodedLength += 2; | 84 encodedLength += 2; |
| (...skipping 24 matching lines...) Expand all Loading... |
| 107 return codeUnitsBuffer; | 109 return codeUnitsBuffer; |
| 108 } | 110 } |
| 109 | 111 |
| 110 /** | 112 /** |
| 111 * Decodes the utf16 codeunits to codepoints. | 113 * Decodes the utf16 codeunits to codepoints. |
| 112 */ | 114 */ |
| 113 List<int> _utf16CodeUnitsToCodepoints( | 115 List<int> _utf16CodeUnitsToCodepoints( |
| 114 List<int> utf16CodeUnits, [int offset = 0, int length, | 116 List<int> utf16CodeUnits, [int offset = 0, int length, |
| 115 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 117 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 116 _ListRangeIterator source = | 118 _ListRangeIterator source = |
| 117 (new _ListRange(utf16CodeUnits, offset, length)).iterator(); | 119 (new _ListRange(utf16CodeUnits, offset, length)).iterator; |
| 118 Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder | 120 Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder |
| 119 .fromListRangeIterator(source, replacementCodepoint); | 121 .fromListRangeIterator(source, replacementCodepoint); |
| 120 List<int> codepoints = new List<int>(source.remaining); | 122 List<int> codepoints = new List<int>(source.remaining); |
| 121 int i = 0; | 123 int i = 0; |
| 122 while (decoder.hasNext) { | 124 while (decoder.moveNext()) { |
| 123 codepoints[i++] = decoder.next(); | 125 codepoints[i++] = decoder.current; |
| 124 } | 126 } |
| 125 if (i == codepoints.length) { | 127 if (i == codepoints.length) { |
| 126 return codepoints; | 128 return codepoints; |
| 127 } else { | 129 } else { |
| 128 List<int> codepointTrunc = new List<int>(i); | 130 List<int> codepointTrunc = new List<int>(i); |
| 129 codepointTrunc.setRange(0, i, codepoints); | 131 codepointTrunc.setRange(0, i, codepoints); |
| 130 return codepointTrunc; | 132 return codepointTrunc; |
| 131 } | 133 } |
| 132 } | 134 } |
| 133 | 135 |
| 134 /** | 136 /** |
| 135 * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units. | 137 * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units. |
| 136 * The parameters can override the default Unicode replacement character. Set | 138 * The parameters can override the default Unicode replacement character. Set |
| 137 * the replacementCharacter to null to throw an ArgumentError | 139 * the replacementCharacter to null to throw an ArgumentError |
| 138 * rather than replace the bad value. | 140 * rather than replace the bad value. |
| 139 */ | 141 */ |
| 140 class Utf16CodeUnitDecoder implements Iterator<int> { | 142 class Utf16CodeUnitDecoder implements Iterator<int> { |
| 141 final _ListRangeIterator utf16CodeUnitIterator; | 143 final _ListRangeIterator utf16CodeUnitIterator; |
| 142 final int replacementCodepoint; | 144 final int replacementCodepoint; |
| 145 int _current = -1; |
| 143 | 146 |
| 144 Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length, | 147 Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length, |
| 145 int this.replacementCodepoint = | 148 int this.replacementCodepoint = |
| 146 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 149 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
| 147 utf16CodeUnitIterator = (new _ListRange(utf16CodeUnits, offset, length)) | 150 utf16CodeUnitIterator = |
| 148 .iterator(); | 151 (new _ListRange(utf16CodeUnits, offset, length)).iterator; |
| 149 | 152 |
| 150 Utf16CodeUnitDecoder.fromListRangeIterator( | 153 Utf16CodeUnitDecoder.fromListRangeIterator( |
| 151 _ListRangeIterator this.utf16CodeUnitIterator, | 154 _ListRangeIterator this.utf16CodeUnitIterator, |
| 152 int this.replacementCodepoint); | 155 int this.replacementCodepoint); |
| 153 | 156 |
| 154 Iterator<int> iterator() => this; | 157 Iterator<int> get iterator => this; |
| 155 | 158 |
| 156 bool get hasNext => utf16CodeUnitIterator.hasNext; | 159 int get current { |
| 160 if (_current == -1) { |
| 161 // TODO(floitsch): bad error message. |
| 162 throw new StateError("No more elements"); |
| 163 } |
| 164 return _current; |
| 165 } |
| 157 | 166 |
| 158 int next() { | 167 bool moveNext() { |
| 159 int value = utf16CodeUnitIterator.next(); | 168 _current = -1; |
| 169 if (!utf16CodeUnitIterator.moveNext()) return false; |
| 170 |
| 171 int value = utf16CodeUnitIterator.current; |
| 160 if (value < 0) { | 172 if (value < 0) { |
| 161 if (replacementCodepoint != null) { | 173 if (replacementCodepoint != null) { |
| 162 return replacementCodepoint; | 174 _current = replacementCodepoint; |
| 163 } else { | 175 } else { |
| 164 throw new ArgumentError( | 176 throw new ArgumentError( |
| 165 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); | 177 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); |
| 166 } | 178 } |
| 167 } else if (value < UNICODE_UTF16_RESERVED_LO || | 179 } else if (value < UNICODE_UTF16_RESERVED_LO || |
| 168 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { | 180 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { |
| 169 // transfer directly | 181 // transfer directly |
| 170 return value; | 182 _current = value; |
| 171 } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE && | 183 } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE && |
| 172 utf16CodeUnitIterator.hasNext) { | 184 utf16CodeUnitIterator.moveNext()) { |
| 173 // merge surrogate pair | 185 // merge surrogate pair |
| 174 int nextValue = utf16CodeUnitIterator.next(); | 186 int nextValue = utf16CodeUnitIterator.current; |
| 175 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE && | 187 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE && |
| 176 nextValue <= UNICODE_UTF16_RESERVED_HI) { | 188 nextValue <= UNICODE_UTF16_RESERVED_HI) { |
| 177 value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10; | 189 value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10; |
| 178 value += UNICODE_UTF16_OFFSET + | 190 value += UNICODE_UTF16_OFFSET + |
| 179 (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE); | 191 (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE); |
| 180 return value; | 192 _current = value; |
| 181 } else { | 193 } else { |
| 182 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE && | 194 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE && |
| 183 nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) { | 195 nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) { |
| 184 utf16CodeUnitIterator.backup(); | 196 utf16CodeUnitIterator.backup(); |
| 185 } | 197 } |
| 186 if (replacementCodepoint != null) { | 198 if (replacementCodepoint != null) { |
| 187 return replacementCodepoint; | 199 _current = replacementCodepoint; |
| 188 } else { | 200 } else { |
| 189 throw new ArgumentError( | 201 throw new ArgumentError( |
| 190 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); | 202 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); |
| 191 } | 203 } |
| 192 } | 204 } |
| 193 } else if (replacementCodepoint != null) { | 205 } else if (replacementCodepoint != null) { |
| 194 return replacementCodepoint; | 206 _current = replacementCodepoint; |
| 195 } else { | 207 } else { |
| 196 throw new ArgumentError( | 208 throw new ArgumentError( |
| 197 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); | 209 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); |
| 198 } | 210 } |
| 211 return true; |
| 199 } | 212 } |
| 200 } | 213 } |
| 201 | 214 |
| 202 /** | 215 /** |
| 203 * _ListRange in an internal type used to create a lightweight Interable on a | 216 * _ListRange in an internal type used to create a lightweight Interable on a |
| 204 * range within a source list. DO NOT MODIFY the underlying list while | 217 * range within a source list. DO NOT MODIFY the underlying list while |
| 205 * iterating over it. The results of doing so are undefined. | 218 * iterating over it. The results of doing so are undefined. |
| 206 */ | 219 */ |
| 207 class _ListRange extends Iterable { | 220 class _ListRange extends Iterable { |
| 208 final List _source; | 221 final List _source; |
| 209 final int _offset; | 222 final int _offset; |
| 210 final int _length; | 223 final int _length; |
| 211 | 224 |
| 212 _ListRange(source, [offset = 0, length]) : | 225 _ListRange(source, [offset = 0, length]) : |
| 213 this._source = source, | 226 this._source = source, |
| 214 this._offset = offset, | 227 this._offset = offset, |
| 215 this._length = (length == null ? source.length - offset : length) { | 228 this._length = (length == null ? source.length - offset : length) { |
| 216 if (_offset < 0 || _offset > _source.length) { | 229 if (_offset < 0 || _offset > _source.length) { |
| 217 throw new RangeError.value(_offset); | 230 throw new RangeError.value(_offset); |
| 218 } | 231 } |
| 219 if (_length != null && (_length < 0)) { | 232 if (_length != null && (_length < 0)) { |
| 220 throw new RangeError.value(_length); | 233 throw new RangeError.value(_length); |
| 221 } | 234 } |
| 222 if (_length + _offset > _source.length) { | 235 if (_length + _offset > _source.length) { |
| 223 throw new RangeError.value(_length + _offset); | 236 throw new RangeError.value(_length + _offset); |
| 224 } | 237 } |
| 225 } | 238 } |
| 226 | 239 |
| 227 _ListRangeIterator iterator() => | 240 _ListRangeIterator get iterator => |
| 228 new _ListRangeIteratorImpl(_source, _offset, _offset + _length); | 241 new _ListRangeIteratorImpl(_source, _offset, _offset + _length); |
| 229 | 242 |
| 230 int get length => _length; | 243 int get length => _length; |
| 231 } | 244 } |
| 232 | 245 |
| 233 /** | 246 /** |
| 234 * The _ListRangeIterator provides more capabilities than a standard iterator, | 247 * The _ListRangeIterator provides more capabilities than a standard iterator, |
| 235 * including the ability to get the current position, count remaining items, | 248 * including the ability to get the current position, count remaining items, |
| 236 * and move forward/backward within the iterator. | 249 * and move forward/backward within the iterator. |
| 237 */ | 250 */ |
| 238 abstract class _ListRangeIterator implements Iterator<int> { | 251 abstract class _ListRangeIterator implements Iterator<int> { |
| 239 bool hasNext; | 252 bool moveNext(); |
| 240 int next(); | 253 int get current; |
| 241 int get position; | 254 int get position; |
| 242 void backup([by]); | 255 void backup([by]); |
| 243 int get remaining; | 256 int get remaining; |
| 244 void skip([count]); | 257 void skip([count]); |
| 245 } | 258 } |
| 246 | 259 |
| 247 class _ListRangeIteratorImpl implements _ListRangeIterator { | 260 class _ListRangeIteratorImpl implements _ListRangeIterator { |
| 248 final List<int> _source; | 261 final List<int> _source; |
| 249 int _offset; | 262 int _offset; |
| 250 final int _end; | 263 final int _end; |
| 251 | 264 |
| 252 _ListRangeIteratorImpl(this._source, this._offset, this._end); | 265 _ListRangeIteratorImpl(this._source, int offset, this._end) |
| 266 : _offset = offset - 1; |
| 253 | 267 |
| 254 bool get hasNext => _offset < _end; | 268 int get current => _source[_offset]; |
| 255 | 269 |
| 256 int next() => _source[_offset++]; | 270 bool moveNext() => ++_offset < _end; |
| 257 | 271 |
| 258 int get position => _offset; | 272 int get position => _offset; |
| 259 | 273 |
| 260 void backup([int by = 1]) { | 274 void backup([int by = 1]) { |
| 261 _offset -= by; | 275 _offset -= by; |
| 262 } | 276 } |
| 263 | 277 |
| 264 int get remaining => _end - _offset; | 278 int get remaining => _end - _offset - 1; |
| 265 | 279 |
| 266 void skip([int count = 1]) { | 280 void skip([int count = 1]) { |
| 267 _offset += count; | 281 _offset += count; |
| 268 } | 282 } |
| 269 } | 283 } |
| 270 | 284 |
| OLD | NEW |