OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 /** | 5 /** |
6 * Provide a list of Unicode codepoints for a given string. | 6 * Provide a list of Unicode codepoints for a given string. |
7 */ | 7 */ |
8 List<int> stringToCodepoints(String str) { | 8 List<int> stringToCodepoints(String str) { |
9 List<int> codepoints; | 9 List<int> codepoints; |
10 // TODO _is16BitCodeUnit() is used to work around a bug with dart2js | 10 // TODO _is16BitCodeUnit() is used to work around a bug with dart2js |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
99 } else if (value > UNICODE_PLANE_ONE_MAX && | 99 } else if (value > UNICODE_PLANE_ONE_MAX && |
100 value <= UNICODE_VALID_RANGE_MAX) { | 100 value <= UNICODE_VALID_RANGE_MAX) { |
101 int base = value - UNICODE_UTF16_OFFSET; | 101 int base = value - UNICODE_UTF16_OFFSET; |
102 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE + | 102 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE + |
103 ((base & UNICODE_UTF16_HI_MASK) >> 10); | 103 ((base & UNICODE_UTF16_HI_MASK) >> 10); |
104 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_1_BASE + | 104 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_1_BASE + |
105 (base & UNICODE_UTF16_LO_MASK); | 105 (base & UNICODE_UTF16_LO_MASK); |
106 } else if (replacementCodepoint != null) { | 106 } else if (replacementCodepoint != null) { |
107 codeUnitsBuffer[j++] = replacementCodepoint; | 107 codeUnitsBuffer[j++] = replacementCodepoint; |
108 } else { | 108 } else { |
109 throw new IllegalArgumentException("Invalid encoding"); | 109 throw new ArgumentError("Invalid encoding"); |
110 } | 110 } |
111 } | 111 } |
112 return codeUnitsBuffer; | 112 return codeUnitsBuffer; |
113 } | 113 } |
114 | 114 |
115 /** | 115 /** |
116 * Decodes the utf16 codeunits to codepoints. | 116 * Decodes the utf16 codeunits to codepoints. |
117 */ | 117 */ |
118 List<int> _utf16CodeUnitsToCodepoints( | 118 List<int> _utf16CodeUnitsToCodepoints( |
119 List<int> utf16CodeUnits, [int offset = 0, int length, | 119 List<int> utf16CodeUnits, [int offset = 0, int length, |
(...skipping 12 matching lines...) Expand all Loading... |
132 } else { | 132 } else { |
133 List<int> codepointTrunc = new List<int>(i); | 133 List<int> codepointTrunc = new List<int>(i); |
134 codepointTrunc.setRange(0, i, codepoints); | 134 codepointTrunc.setRange(0, i, codepoints); |
135 return codepointTrunc; | 135 return codepointTrunc; |
136 } | 136 } |
137 } | 137 } |
138 | 138 |
139 /** | 139 /** |
140 * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units. | 140 * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units. |
141 * The parameters can override the default Unicode replacement character. Set | 141 * The parameters can override the default Unicode replacement character. Set |
142 * the replacementCharacter to null to throw an IllegalArgumentException | 142 * the replacementCharacter to null to throw an ArgumentError |
143 * rather than replace the bad value. | 143 * rather than replace the bad value. |
144 */ | 144 */ |
145 class Utf16CodeUnitDecoder implements Iterator<int> { | 145 class Utf16CodeUnitDecoder implements Iterator<int> { |
146 final _ListRangeIterator utf16CodeUnitIterator; | 146 final _ListRangeIterator utf16CodeUnitIterator; |
147 final int replacementCodepoint; | 147 final int replacementCodepoint; |
148 | 148 |
149 Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length, | 149 Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length, |
150 int this.replacementCodepoint = | 150 int this.replacementCodepoint = |
151 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 151 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
152 utf16CodeUnitIterator = (new _ListRange(utf16CodeUnits, offset, length)) | 152 utf16CodeUnitIterator = (new _ListRange(utf16CodeUnits, offset, length)) |
153 .iterator(); | 153 .iterator(); |
154 | 154 |
155 Utf16CodeUnitDecoder.fromListRangeIterator( | 155 Utf16CodeUnitDecoder.fromListRangeIterator( |
156 _ListRangeIterator this.utf16CodeUnitIterator, | 156 _ListRangeIterator this.utf16CodeUnitIterator, |
157 int this.replacementCodepoint); | 157 int this.replacementCodepoint); |
158 | 158 |
159 Iterator<int> iterator() => this; | 159 Iterator<int> iterator() => this; |
160 | 160 |
161 bool hasNext() => utf16CodeUnitIterator.hasNext(); | 161 bool hasNext() => utf16CodeUnitIterator.hasNext(); |
162 | 162 |
163 int next() { | 163 int next() { |
164 int value = utf16CodeUnitIterator.next(); | 164 int value = utf16CodeUnitIterator.next(); |
165 if (value < 0) { | 165 if (value < 0) { |
166 if (replacementCodepoint != null) { | 166 if (replacementCodepoint != null) { |
167 return replacementCodepoint; | 167 return replacementCodepoint; |
168 } else { | 168 } else { |
169 throw new IllegalArgumentException( | 169 throw new ArgumentError( |
170 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); | 170 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); |
171 } | 171 } |
172 } else if (value < UNICODE_UTF16_RESERVED_LO || | 172 } else if (value < UNICODE_UTF16_RESERVED_LO || |
173 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { | 173 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { |
174 // transfer directly | 174 // transfer directly |
175 return value; | 175 return value; |
176 } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE && | 176 } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE && |
177 utf16CodeUnitIterator.hasNext()) { | 177 utf16CodeUnitIterator.hasNext()) { |
178 // merge surrogate pair | 178 // merge surrogate pair |
179 int nextValue = utf16CodeUnitIterator.next(); | 179 int nextValue = utf16CodeUnitIterator.next(); |
180 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE && | 180 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE && |
181 nextValue <= UNICODE_UTF16_RESERVED_HI) { | 181 nextValue <= UNICODE_UTF16_RESERVED_HI) { |
182 value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10; | 182 value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10; |
183 value += UNICODE_UTF16_OFFSET + | 183 value += UNICODE_UTF16_OFFSET + |
184 (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE); | 184 (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE); |
185 return value; | 185 return value; |
186 } else { | 186 } else { |
187 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE && | 187 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE && |
188 nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) { | 188 nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) { |
189 utf16CodeUnitIterator.backup(); | 189 utf16CodeUnitIterator.backup(); |
190 } | 190 } |
191 if (replacementCodepoint != null) { | 191 if (replacementCodepoint != null) { |
192 return replacementCodepoint; | 192 return replacementCodepoint; |
193 } else { | 193 } else { |
194 throw new IllegalArgumentException( | 194 throw new ArgumentError( |
195 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); | 195 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); |
196 } | 196 } |
197 } | 197 } |
198 } else if (replacementCodepoint != null) { | 198 } else if (replacementCodepoint != null) { |
199 return replacementCodepoint; | 199 return replacementCodepoint; |
200 } else { | 200 } else { |
201 throw new IllegalArgumentException( | 201 throw new ArgumentError( |
202 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); | 202 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); |
203 } | 203 } |
204 } | 204 } |
205 } | 205 } |
206 | 206 |
207 /** | 207 /** |
208 * _ListRange in an internal type used to create a lightweight Interable on a | 208 * _ListRange in an internal type used to create a lightweight Interable on a |
209 * range within a source list. DO NOT MODIFY the underlying list while | 209 * range within a source list. DO NOT MODIFY the underlying list while |
210 * iterating over it. The results of doing so are undefined. | 210 * iterating over it. The results of doing so are undefined. |
211 */ | 211 */ |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
266 _offset -= by; | 266 _offset -= by; |
267 } | 267 } |
268 | 268 |
269 int get remaining => _end - _offset; | 269 int get remaining => _end - _offset; |
270 | 270 |
271 void skip([int count = 1]) { | 271 void skip([int count = 1]) { |
272 _offset += count; | 272 _offset += count; |
273 } | 273 } |
274 } | 274 } |
275 | 275 |
OLD | NEW |