| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 /** | 5 /** |
| 6 * Provide a list of Unicode codepoints for a given string. | 6 * Provide a list of Unicode codepoints for a given string. |
| 7 */ | 7 */ |
| 8 List<int> stringToCodepoints(String str) { | 8 List<int> stringToCodepoints(String str) { |
| 9 List<int> codepoints; | 9 List<int> codepoints; |
| 10 // TODO _is16BitCodeUnit() is used to work around a bug with dart2js | 10 // TODO _is16BitCodeUnit() is used to work around a bug with dart2js |
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 99 } else if (value > UNICODE_PLANE_ONE_MAX && | 99 } else if (value > UNICODE_PLANE_ONE_MAX && |
| 100 value <= UNICODE_VALID_RANGE_MAX) { | 100 value <= UNICODE_VALID_RANGE_MAX) { |
| 101 int base = value - UNICODE_UTF16_OFFSET; | 101 int base = value - UNICODE_UTF16_OFFSET; |
| 102 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE + | 102 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE + |
| 103 ((base & UNICODE_UTF16_HI_MASK) >> 10); | 103 ((base & UNICODE_UTF16_HI_MASK) >> 10); |
| 104 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_1_BASE + | 104 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_1_BASE + |
| 105 (base & UNICODE_UTF16_LO_MASK); | 105 (base & UNICODE_UTF16_LO_MASK); |
| 106 } else if (replacementCodepoint != null) { | 106 } else if (replacementCodepoint != null) { |
| 107 codeUnitsBuffer[j++] = replacementCodepoint; | 107 codeUnitsBuffer[j++] = replacementCodepoint; |
| 108 } else { | 108 } else { |
| 109 throw new IllegalArgumentException("Invalid encoding"); | 109 throw new ArgumentError("Invalid encoding"); |
| 110 } | 110 } |
| 111 } | 111 } |
| 112 return codeUnitsBuffer; | 112 return codeUnitsBuffer; |
| 113 } | 113 } |
| 114 | 114 |
| 115 /** | 115 /** |
| 116 * Decodes the utf16 codeunits to codepoints. | 116 * Decodes the utf16 codeunits to codepoints. |
| 117 */ | 117 */ |
| 118 List<int> _utf16CodeUnitsToCodepoints( | 118 List<int> _utf16CodeUnitsToCodepoints( |
| 119 List<int> utf16CodeUnits, [int offset = 0, int length, | 119 List<int> utf16CodeUnits, [int offset = 0, int length, |
| (...skipping 12 matching lines...) Expand all Loading... |
| 132 } else { | 132 } else { |
| 133 List<int> codepointTrunc = new List<int>(i); | 133 List<int> codepointTrunc = new List<int>(i); |
| 134 codepointTrunc.setRange(0, i, codepoints); | 134 codepointTrunc.setRange(0, i, codepoints); |
| 135 return codepointTrunc; | 135 return codepointTrunc; |
| 136 } | 136 } |
| 137 } | 137 } |
| 138 | 138 |
| 139 /** | 139 /** |
| 140 * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units. | 140 * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units. |
| 141 * The parameters can override the default Unicode replacement character. Set | 141 * The parameters can override the default Unicode replacement character. Set |
| 142 * the replacementCharacter to null to throw an IllegalArgumentException | 142 * the replacementCharacter to null to throw an ArgumentError |
| 143 * rather than replace the bad value. | 143 * rather than replace the bad value. |
| 144 */ | 144 */ |
| 145 class Utf16CodeUnitDecoder implements Iterator<int> { | 145 class Utf16CodeUnitDecoder implements Iterator<int> { |
| 146 final _ListRangeIterator utf16CodeUnitIterator; | 146 final _ListRangeIterator utf16CodeUnitIterator; |
| 147 final int replacementCodepoint; | 147 final int replacementCodepoint; |
| 148 | 148 |
| 149 Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length, | 149 Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length, |
| 150 int this.replacementCodepoint = | 150 int this.replacementCodepoint = |
| 151 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 151 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
| 152 utf16CodeUnitIterator = (new _ListRange(utf16CodeUnits, offset, length)) | 152 utf16CodeUnitIterator = (new _ListRange(utf16CodeUnits, offset, length)) |
| 153 .iterator(); | 153 .iterator(); |
| 154 | 154 |
| 155 Utf16CodeUnitDecoder.fromListRangeIterator( | 155 Utf16CodeUnitDecoder.fromListRangeIterator( |
| 156 _ListRangeIterator this.utf16CodeUnitIterator, | 156 _ListRangeIterator this.utf16CodeUnitIterator, |
| 157 int this.replacementCodepoint); | 157 int this.replacementCodepoint); |
| 158 | 158 |
| 159 Iterator<int> iterator() => this; | 159 Iterator<int> iterator() => this; |
| 160 | 160 |
| 161 bool hasNext() => utf16CodeUnitIterator.hasNext(); | 161 bool hasNext() => utf16CodeUnitIterator.hasNext(); |
| 162 | 162 |
| 163 int next() { | 163 int next() { |
| 164 int value = utf16CodeUnitIterator.next(); | 164 int value = utf16CodeUnitIterator.next(); |
| 165 if (value < 0) { | 165 if (value < 0) { |
| 166 if (replacementCodepoint != null) { | 166 if (replacementCodepoint != null) { |
| 167 return replacementCodepoint; | 167 return replacementCodepoint; |
| 168 } else { | 168 } else { |
| 169 throw new IllegalArgumentException( | 169 throw new ArgumentError( |
| 170 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); | 170 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); |
| 171 } | 171 } |
| 172 } else if (value < UNICODE_UTF16_RESERVED_LO || | 172 } else if (value < UNICODE_UTF16_RESERVED_LO || |
| 173 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { | 173 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { |
| 174 // transfer directly | 174 // transfer directly |
| 175 return value; | 175 return value; |
| 176 } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE && | 176 } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE && |
| 177 utf16CodeUnitIterator.hasNext()) { | 177 utf16CodeUnitIterator.hasNext()) { |
| 178 // merge surrogate pair | 178 // merge surrogate pair |
| 179 int nextValue = utf16CodeUnitIterator.next(); | 179 int nextValue = utf16CodeUnitIterator.next(); |
| 180 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE && | 180 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE && |
| 181 nextValue <= UNICODE_UTF16_RESERVED_HI) { | 181 nextValue <= UNICODE_UTF16_RESERVED_HI) { |
| 182 value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10; | 182 value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10; |
| 183 value += UNICODE_UTF16_OFFSET + | 183 value += UNICODE_UTF16_OFFSET + |
| 184 (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE); | 184 (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE); |
| 185 return value; | 185 return value; |
| 186 } else { | 186 } else { |
| 187 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE && | 187 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE && |
| 188 nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) { | 188 nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) { |
| 189 utf16CodeUnitIterator.backup(); | 189 utf16CodeUnitIterator.backup(); |
| 190 } | 190 } |
| 191 if (replacementCodepoint != null) { | 191 if (replacementCodepoint != null) { |
| 192 return replacementCodepoint; | 192 return replacementCodepoint; |
| 193 } else { | 193 } else { |
| 194 throw new IllegalArgumentException( | 194 throw new ArgumentError( |
| 195 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); | 195 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); |
| 196 } | 196 } |
| 197 } | 197 } |
| 198 } else if (replacementCodepoint != null) { | 198 } else if (replacementCodepoint != null) { |
| 199 return replacementCodepoint; | 199 return replacementCodepoint; |
| 200 } else { | 200 } else { |
| 201 throw new IllegalArgumentException( | 201 throw new ArgumentError( |
| 202 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); | 202 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); |
| 203 } | 203 } |
| 204 } | 204 } |
| 205 } | 205 } |
| 206 | 206 |
| 207 /** | 207 /** |
| 208 * _ListRange in an internal type used to create a lightweight Interable on a | 208 * _ListRange in an internal type used to create a lightweight Interable on a |
| 209 * range within a source list. DO NOT MODIFY the underlying list while | 209 * range within a source list. DO NOT MODIFY the underlying list while |
| 210 * iterating over it. The results of doing so are undefined. | 210 * iterating over it. The results of doing so are undefined. |
| 211 */ | 211 */ |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 266 _offset -= by; | 266 _offset -= by; |
| 267 } | 267 } |
| 268 | 268 |
| 269 int get remaining => _end - _offset; | 269 int get remaining => _end - _offset; |
| 270 | 270 |
| 271 void skip([int count = 1]) { | 271 void skip([int count = 1]) { |
| 272 _offset += count; | 272 _offset += count; |
| 273 } | 273 } |
| 274 } | 274 } |
| 275 | 275 |
| OLD | NEW |