| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 part of utf; | 5 part of utf; |
| 6 | 6 |
| 7 const int _UTF8_ONE_BYTE_MAX = 0x7f; | 7 const int _UTF8_ONE_BYTE_MAX = 0x7f; |
| 8 const int _UTF8_TWO_BYTE_MAX = 0x7ff; | 8 const int _UTF8_TWO_BYTE_MAX = 0x7ff; |
| 9 const int _UTF8_THREE_BYTE_MAX = 0xffff; | 9 const int _UTF8_THREE_BYTE_MAX = 0xffff; |
| 10 | 10 |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 62 bytes--; | 62 bytes--; |
| 63 } | 63 } |
| 64 return value; | 64 return value; |
| 65 } | 65 } |
| 66 | 66 |
| 67 /** | 67 /** |
| 68 * Encode code points as UTF-8 code units. | 68 * Encode code points as UTF-8 code units. |
| 69 */ | 69 */ |
| 70 List<int> codepointsToUtf8( | 70 List<int> codepointsToUtf8( |
| 71 List<int> codepoints, [int offset = 0, int length]) { | 71 List<int> codepoints, [int offset = 0, int length]) { |
| 72 _ListRange source = new _ListRange(codepoints, offset, length); | 72 ListRange source = new ListRange(codepoints, offset, length); |
| 73 | 73 |
| 74 int encodedLength = 0; | 74 int encodedLength = 0; |
| 75 for (int value in source) { | 75 for (int value in source) { |
| 76 if (value < 0 || value > UNICODE_VALID_RANGE_MAX) { | 76 if (value < 0 || value > UNICODE_VALID_RANGE_MAX) { |
| 77 encodedLength += 3; | 77 encodedLength += 3; |
| 78 } else if (value <= _UTF8_ONE_BYTE_MAX) { | 78 } else if (value <= _UTF8_ONE_BYTE_MAX) { |
| 79 encodedLength++; | 79 encodedLength++; |
| 80 } else if (value <= _UTF8_TWO_BYTE_MAX) { | 80 } else if (value <= _UTF8_TWO_BYTE_MAX) { |
| 81 encodedLength += 2; | 81 encodedLength += 2; |
| 82 } else if (value <= _UTF8_THREE_BYTE_MAX) { | 82 } else if (value <= _UTF8_THREE_BYTE_MAX) { |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 146 | 146 |
| 147 /** | 147 /** |
| 148 * Provides an iterator of Unicode codepoints from UTF-8 encoded bytes. The | 148 * Provides an iterator of Unicode codepoints from UTF-8 encoded bytes. The |
| 149 * parameters can set an offset into a list of bytes (as int), limit the length | 149 * parameters can set an offset into a list of bytes (as int), limit the length |
| 150 * of the values to be decoded, and override the default Unicode replacement | 150 * of the values to be decoded, and override the default Unicode replacement |
| 151 * character. Set the replacementCharacter to null to throw an | 151 * character. Set the replacementCharacter to null to throw an |
| 152 * ArgumentError rather than replace the bad value. The return value | 152 * ArgumentError rather than replace the bad value. The return value |
| 153 * from this method can be used as an Iterable (e.g. in a for-loop). | 153 * from this method can be used as an Iterable (e.g. in a for-loop). |
| 154 */ | 154 */ |
| 155 class Utf8Decoder implements Iterator<int> { | 155 class Utf8Decoder implements Iterator<int> { |
| 156 final _ListRangeIterator utf8EncodedBytesIterator; | 156 // TODO(kevmoo): should this field be private? |
| 157 final ListRangeIterator utf8EncodedBytesIterator; |
| 157 final int replacementCodepoint; | 158 final int replacementCodepoint; |
| 158 int _current = null; | 159 int _current = null; |
| 159 | 160 |
| 160 Utf8Decoder(List<int> utf8EncodedBytes, [int offset = 0, int length, | 161 Utf8Decoder(List<int> utf8EncodedBytes, [int offset = 0, int length, |
| 161 this.replacementCodepoint = | 162 this.replacementCodepoint = |
| 162 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 163 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
| 163 utf8EncodedBytesIterator = | 164 utf8EncodedBytesIterator = |
| 164 (new _ListRange(utf8EncodedBytes, offset, length)).iterator; | 165 (new ListRange(utf8EncodedBytes, offset, length)).iterator; |
| 165 | 166 |
| 166 | 167 |
| 167 Utf8Decoder._fromListRangeIterator(_ListRange source, [ | 168 Utf8Decoder._fromListRangeIterator(ListRange source, [ |
| 168 this.replacementCodepoint = | 169 this.replacementCodepoint = |
| 169 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 170 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
| 170 utf8EncodedBytesIterator = source.iterator; | 171 utf8EncodedBytesIterator = source.iterator; |
| 171 | 172 |
| 172 /** Decode the remaininder of the characters in this decoder | 173 /** Decode the remaininder of the characters in this decoder |
| 173 * into a [List<int>]. | 174 * into a [List<int>]. |
| 174 */ | 175 */ |
| 175 List<int> decodeRest() { | 176 List<int> decodeRest() { |
| 176 List<int> codepoints = new List<int>(utf8EncodedBytesIterator.remaining); | 177 List<int> codepoints = new List<int>(utf8EncodedBytesIterator.remaining); |
| 177 int i = 0; | 178 int i = 0; |
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 266 return true; | 267 return true; |
| 267 } else if (replacementCodepoint != null) { | 268 } else if (replacementCodepoint != null) { |
| 268 _current = replacementCodepoint; | 269 _current = replacementCodepoint; |
| 269 return true; | 270 return true; |
| 270 } else { | 271 } else { |
| 271 throw new ArgumentError( | 272 throw new ArgumentError( |
| 272 "Invalid UTF8 at ${utf8EncodedBytesIterator.position - j}"); | 273 "Invalid UTF8 at ${utf8EncodedBytesIterator.position - j}"); |
| 273 } | 274 } |
| 274 } | 275 } |
| 275 } | 276 } |
| OLD | NEW |