OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of utf; | 5 part of utf; |
6 | 6 |
7 const int _UTF8_ONE_BYTE_MAX = 0x7f; | 7 const int _UTF8_ONE_BYTE_MAX = 0x7f; |
8 const int _UTF8_TWO_BYTE_MAX = 0x7ff; | 8 const int _UTF8_TWO_BYTE_MAX = 0x7ff; |
9 const int _UTF8_THREE_BYTE_MAX = 0xffff; | 9 const int _UTF8_THREE_BYTE_MAX = 0xffff; |
10 | 10 |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
62 bytes--; | 62 bytes--; |
63 } | 63 } |
64 return value; | 64 return value; |
65 } | 65 } |
66 | 66 |
67 /** | 67 /** |
68 * Encode code points as UTF-8 code units. | 68 * Encode code points as UTF-8 code units. |
69 */ | 69 */ |
70 List<int> codepointsToUtf8( | 70 List<int> codepointsToUtf8( |
71 List<int> codepoints, [int offset = 0, int length]) { | 71 List<int> codepoints, [int offset = 0, int length]) { |
72 _ListRange source = new _ListRange(codepoints, offset, length); | 72 ListRange source = new ListRange(codepoints, offset, length); |
73 | 73 |
74 int encodedLength = 0; | 74 int encodedLength = 0; |
75 for (int value in source) { | 75 for (int value in source) { |
76 if (value < 0 || value > UNICODE_VALID_RANGE_MAX) { | 76 if (value < 0 || value > UNICODE_VALID_RANGE_MAX) { |
77 encodedLength += 3; | 77 encodedLength += 3; |
78 } else if (value <= _UTF8_ONE_BYTE_MAX) { | 78 } else if (value <= _UTF8_ONE_BYTE_MAX) { |
79 encodedLength++; | 79 encodedLength++; |
80 } else if (value <= _UTF8_TWO_BYTE_MAX) { | 80 } else if (value <= _UTF8_TWO_BYTE_MAX) { |
81 encodedLength += 2; | 81 encodedLength += 2; |
82 } else if (value <= _UTF8_THREE_BYTE_MAX) { | 82 } else if (value <= _UTF8_THREE_BYTE_MAX) { |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
146 | 146 |
147 /** | 147 /** |
148 * Provides an iterator of Unicode codepoints from UTF-8 encoded bytes. The | 148 * Provides an iterator of Unicode codepoints from UTF-8 encoded bytes. The |
149 * parameters can set an offset into a list of bytes (as int), limit the length | 149 * parameters can set an offset into a list of bytes (as int), limit the length |
150 * of the values to be decoded, and override the default Unicode replacement | 150 * of the values to be decoded, and override the default Unicode replacement |
151 * character. Set the replacementCharacter to null to throw an | 151 * character. Set the replacementCharacter to null to throw an |
152 * ArgumentError rather than replace the bad value. The return value | 152 * ArgumentError rather than replace the bad value. The return value |
153 * from this method can be used as an Iterable (e.g. in a for-loop). | 153 * from this method can be used as an Iterable (e.g. in a for-loop). |
154 */ | 154 */ |
155 class Utf8Decoder implements Iterator<int> { | 155 class Utf8Decoder implements Iterator<int> { |
156 final _ListRangeIterator utf8EncodedBytesIterator; | 156 // TODO(kevmoo): should this field be private? |
| 157 final ListRangeIterator utf8EncodedBytesIterator; |
157 final int replacementCodepoint; | 158 final int replacementCodepoint; |
158 int _current = null; | 159 int _current = null; |
159 | 160 |
160 Utf8Decoder(List<int> utf8EncodedBytes, [int offset = 0, int length, | 161 Utf8Decoder(List<int> utf8EncodedBytes, [int offset = 0, int length, |
161 this.replacementCodepoint = | 162 this.replacementCodepoint = |
162 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 163 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
163 utf8EncodedBytesIterator = | 164 utf8EncodedBytesIterator = |
164 (new _ListRange(utf8EncodedBytes, offset, length)).iterator; | 165 (new ListRange(utf8EncodedBytes, offset, length)).iterator; |
165 | 166 |
166 | 167 |
167 Utf8Decoder._fromListRangeIterator(_ListRange source, [ | 168 Utf8Decoder._fromListRangeIterator(ListRange source, [ |
168 this.replacementCodepoint = | 169 this.replacementCodepoint = |
169 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 170 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
170 utf8EncodedBytesIterator = source.iterator; | 171 utf8EncodedBytesIterator = source.iterator; |
171 | 172 |
172 /** Decode the remaininder of the characters in this decoder | 173 /** Decode the remaininder of the characters in this decoder |
173 * into a [List<int>]. | 174 * into a [List<int>]. |
174 */ | 175 */ |
175 List<int> decodeRest() { | 176 List<int> decodeRest() { |
176 List<int> codepoints = new List<int>(utf8EncodedBytesIterator.remaining); | 177 List<int> codepoints = new List<int>(utf8EncodedBytesIterator.remaining); |
177 int i = 0; | 178 int i = 0; |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
266 return true; | 267 return true; |
267 } else if (replacementCodepoint != null) { | 268 } else if (replacementCodepoint != null) { |
268 _current = replacementCodepoint; | 269 _current = replacementCodepoint; |
269 return true; | 270 return true; |
270 } else { | 271 } else { |
271 throw new ArgumentError( | 272 throw new ArgumentError( |
272 "Invalid UTF8 at ${utf8EncodedBytesIterator.position - j}"); | 273 "Invalid UTF8 at ${utf8EncodedBytesIterator.position - j}"); |
273 } | 274 } |
274 } | 275 } |
275 } | 276 } |
OLD | NEW |