OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 const int _UTF8_ONE_BYTE_MAX = 0x7f; | 5 const int _UTF8_ONE_BYTE_MAX = 0x7f; |
6 const int _UTF8_TWO_BYTE_MAX = 0x7ff; | 6 const int _UTF8_TWO_BYTE_MAX = 0x7ff; |
7 const int _UTF8_THREE_BYTE_MAX = 0xffff; | 7 const int _UTF8_THREE_BYTE_MAX = 0xffff; |
8 | 8 |
9 const int _UTF8_LO_SIX_BIT_MASK = 0x3f; | 9 const int _UTF8_LO_SIX_BIT_MASK = 0x3f; |
10 | 10 |
(...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
163 this.replacementCodepoint = | 163 this.replacementCodepoint = |
164 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 164 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
165 utf8EncodedBytesIterator = source.iterator(); | 165 utf8EncodedBytesIterator = source.iterator(); |
166 | 166 |
167 /** Decode the remaininder of the characters in this decoder | 167 /** Decode the remaininder of the characters in this decoder |
168 * into a [List<int>]. | 168 * into a [List<int>]. |
169 */ | 169 */ |
170 List<int> decodeRest() { | 170 List<int> decodeRest() { |
171 List<int> codepoints = new List<int>(utf8EncodedBytesIterator.remaining); | 171 List<int> codepoints = new List<int>(utf8EncodedBytesIterator.remaining); |
172 int i = 0; | 172 int i = 0; |
173 while (hasNext()) { | 173 while (hasNext) { |
174 codepoints[i++] = next(); | 174 codepoints[i++] = next(); |
175 } | 175 } |
176 if (i == codepoints.length) { | 176 if (i == codepoints.length) { |
177 return codepoints; | 177 return codepoints; |
178 } else { | 178 } else { |
179 List<int> truncCodepoints = new List<int>(i); | 179 List<int> truncCodepoints = new List<int>(i); |
180 truncCodepoints.setRange(0, i, codepoints); | 180 truncCodepoints.setRange(0, i, codepoints); |
181 return truncCodepoints; | 181 return truncCodepoints; |
182 } | 182 } |
183 } | 183 } |
184 | 184 |
185 bool hasNext() => utf8EncodedBytesIterator.hasNext(); | 185 bool get hasNext => utf8EncodedBytesIterator.hasNext; |
186 | 186 |
187 int next() { | 187 int next() { |
188 int value = utf8EncodedBytesIterator.next(); | 188 int value = utf8EncodedBytesIterator.next(); |
189 int additionalBytes = 0; | 189 int additionalBytes = 0; |
190 | 190 |
191 if (value < 0) { | 191 if (value < 0) { |
192 if (replacementCodepoint != null) { | 192 if (replacementCodepoint != null) { |
193 return replacementCodepoint; | 193 return replacementCodepoint; |
194 } else { | 194 } else { |
195 throw new ArgumentError( | 195 throw new ArgumentError( |
(...skipping 23 matching lines...) Expand all Loading... |
219 } else if (value < _UTF8_FIRST_BYTE_BOUND_EXCL) { | 219 } else if (value < _UTF8_FIRST_BYTE_BOUND_EXCL) { |
220 value -= _UTF8_FIRST_BYTE_OF_SIX_BASE; | 220 value -= _UTF8_FIRST_BYTE_OF_SIX_BASE; |
221 additionalBytes = 5; | 221 additionalBytes = 5; |
222 } else if (replacementCodepoint != null) { | 222 } else if (replacementCodepoint != null) { |
223 return replacementCodepoint; | 223 return replacementCodepoint; |
224 } else { | 224 } else { |
225 throw new ArgumentError( | 225 throw new ArgumentError( |
226 "Invalid UTF8 at ${utf8EncodedBytesIterator.position}"); | 226 "Invalid UTF8 at ${utf8EncodedBytesIterator.position}"); |
227 } | 227 } |
228 int j = 0; | 228 int j = 0; |
229 while (j < additionalBytes && utf8EncodedBytesIterator.hasNext()) { | 229 while (j < additionalBytes && utf8EncodedBytesIterator.hasNext) { |
230 int nextValue = utf8EncodedBytesIterator.next(); | 230 int nextValue = utf8EncodedBytesIterator.next(); |
231 if (nextValue > _UTF8_ONE_BYTE_MAX && | 231 if (nextValue > _UTF8_ONE_BYTE_MAX && |
232 nextValue < _UTF8_FIRST_BYTE_OF_TWO_BASE) { | 232 nextValue < _UTF8_FIRST_BYTE_OF_TWO_BASE) { |
233 value = ((value << 6) | (nextValue & _UTF8_LO_SIX_BIT_MASK)); | 233 value = ((value << 6) | (nextValue & _UTF8_LO_SIX_BIT_MASK)); |
234 } else { | 234 } else { |
235 // if sequence-starting code unit, reposition cursor to start here | 235 // if sequence-starting code unit, reposition cursor to start here |
236 if (nextValue >= _UTF8_FIRST_BYTE_OF_TWO_BASE) { | 236 if (nextValue >= _UTF8_FIRST_BYTE_OF_TWO_BASE) { |
237 utf8EncodedBytesIterator.backup(); | 237 utf8EncodedBytesIterator.backup(); |
238 } | 238 } |
239 break; | 239 break; |
(...skipping 11 matching lines...) Expand all Loading... |
251 if (validSequence && nonOverlong && inRange) { | 251 if (validSequence && nonOverlong && inRange) { |
252 return value; | 252 return value; |
253 } else if (replacementCodepoint != null) { | 253 } else if (replacementCodepoint != null) { |
254 return replacementCodepoint; | 254 return replacementCodepoint; |
255 } else { | 255 } else { |
256 throw new ArgumentError( | 256 throw new ArgumentError( |
257 "Invalid UTF8 at ${utf8EncodedBytesIterator.position - j}"); | 257 "Invalid UTF8 at ${utf8EncodedBytesIterator.position - j}"); |
258 } | 258 } |
259 } | 259 } |
260 } | 260 } |
OLD | NEW |