Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(90)

Side by Side Diff: lib/utf/utf8.dart

Issue 10989013: Change IllegalArgumentException to ArgumentError. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Updated co19 test expectations. Created 8 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 const int _UTF8_ONE_BYTE_MAX = 0x7f; 5 const int _UTF8_ONE_BYTE_MAX = 0x7f;
6 const int _UTF8_TWO_BYTE_MAX = 0x7ff; 6 const int _UTF8_TWO_BYTE_MAX = 0x7ff;
7 const int _UTF8_THREE_BYTE_MAX = 0xffff; 7 const int _UTF8_THREE_BYTE_MAX = 0xffff;
8 8
9 const int _UTF8_LO_SIX_BIT_MASK = 0x3f; 9 const int _UTF8_LO_SIX_BIT_MASK = 0x3f;
10 10
11 const int _UTF8_FIRST_BYTE_OF_TWO_BASE = 0xc0; 11 const int _UTF8_FIRST_BYTE_OF_TWO_BASE = 0xc0;
12 const int _UTF8_FIRST_BYTE_OF_THREE_BASE = 0xe0; 12 const int _UTF8_FIRST_BYTE_OF_THREE_BASE = 0xe0;
13 const int _UTF8_FIRST_BYTE_OF_FOUR_BASE = 0xf0; 13 const int _UTF8_FIRST_BYTE_OF_FOUR_BASE = 0xf0;
14 const int _UTF8_FIRST_BYTE_OF_FIVE_BASE = 0xf8; 14 const int _UTF8_FIRST_BYTE_OF_FIVE_BASE = 0xf8;
15 const int _UTF8_FIRST_BYTE_OF_SIX_BASE = 0xfc; 15 const int _UTF8_FIRST_BYTE_OF_SIX_BASE = 0xfc;
16 16
17 const int _UTF8_FIRST_BYTE_OF_TWO_MASK = 0x1f; 17 const int _UTF8_FIRST_BYTE_OF_TWO_MASK = 0x1f;
18 const int _UTF8_FIRST_BYTE_OF_THREE_MASK = 0xf; 18 const int _UTF8_FIRST_BYTE_OF_THREE_MASK = 0xf;
19 const int _UTF8_FIRST_BYTE_OF_FOUR_MASK = 0x7; 19 const int _UTF8_FIRST_BYTE_OF_FOUR_MASK = 0x7;
20 20
21 const int _UTF8_FIRST_BYTE_BOUND_EXCL = 0xfe; 21 const int _UTF8_FIRST_BYTE_BOUND_EXCL = 0xfe;
22 const int _UTF8_SUBSEQUENT_BYTE_BASE = 0x80; 22 const int _UTF8_SUBSEQUENT_BYTE_BASE = 0x80;
23 23
24 /** 24 /**
25 * Decodes the UTF-8 bytes as an iterable. Thus, the consumer can only convert 25 * Decodes the UTF-8 bytes as an iterable. Thus, the consumer can only convert
26 * as much of the input as needed. Set the replacementCharacter to null to 26 * as much of the input as needed. Set the replacementCharacter to null to
27 * throw an IllegalArgumentException rather than replace the bad value. 27 * throw an ArgumentError rather than replace the bad value.
28 */ 28 */
29 IterableUtf8Decoder decodeUtf8AsIterable(List<int> bytes, [int offset = 0, 29 IterableUtf8Decoder decodeUtf8AsIterable(List<int> bytes, [int offset = 0,
30 int length, 30 int length,
31 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 31 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
32 return new IterableUtf8Decoder(bytes, offset, length, replacementCodepoint); 32 return new IterableUtf8Decoder(bytes, offset, length, replacementCodepoint);
33 } 33 }
34 34
35 /** 35 /**
36 * Produce a String from a List of UTF-8 encoded bytes. The parameters 36 * Produce a String from a List of UTF-8 encoded bytes. The parameters
37 * can set an offset into a list of bytes (as int), limit the length of the 37 * can set an offset into a list of bytes (as int), limit the length of the
38 * values to be decoded, and override the default Unicode replacement character. 38 * values to be decoded, and override the default Unicode replacement character.
39 * Set the replacementCharacter to null to throw an IllegalArgumentException 39 * Set the replacementCharacter to null to throw an ArgumentError
40 * rather than replace the bad value. 40 * rather than replace the bad value.
41 */ 41 */
42 String decodeUtf8(List<int> bytes, [int offset = 0, int length, 42 String decodeUtf8(List<int> bytes, [int offset = 0, int length,
43 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 43 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
44 return codepointsToString( 44 return codepointsToString(
45 (new Utf8Decoder(bytes, offset, length, replacementCodepoint)) 45 (new Utf8Decoder(bytes, offset, length, replacementCodepoint))
46 .decodeRest()); 46 .decodeRest());
47 } 47 }
48 48
49 /** 49 /**
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
139 139
140 Utf8Decoder iterator() => new Utf8Decoder(bytes, offset, length, 140 Utf8Decoder iterator() => new Utf8Decoder(bytes, offset, length,
141 replacementCodepoint); 141 replacementCodepoint);
142 } 142 }
143 143
144 /** 144 /**
145 * Provides an iterator of Unicode codepoints from UTF-8 encoded bytes. The 145 * Provides an iterator of Unicode codepoints from UTF-8 encoded bytes. The
146 * parameters can set an offset into a list of bytes (as int), limit the length 146 * parameters can set an offset into a list of bytes (as int), limit the length
147 * of the values to be decoded, and override the default Unicode replacement 147 * of the values to be decoded, and override the default Unicode replacement
148 * character. Set the replacementCharacter to null to throw an 148 * character. Set the replacementCharacter to null to throw an
149 * IllegalArgumentException rather than replace the bad value. The return value 149 * ArgumentError rather than replace the bad value. The return value
150 * from this method can be used as an Iterable (e.g. in a for-loop). 150 * from this method can be used as an Iterable (e.g. in a for-loop).
151 */ 151 */
152 class Utf8Decoder implements Iterator<int> { 152 class Utf8Decoder implements Iterator<int> {
153 final _ListRangeIterator utf8EncodedBytesIterator; 153 final _ListRangeIterator utf8EncodedBytesIterator;
154 final int replacementCodepoint; 154 final int replacementCodepoint;
155 155
156 Utf8Decoder(List<int> utf8EncodedBytes, [int offset = 0, int length, 156 Utf8Decoder(List<int> utf8EncodedBytes, [int offset = 0, int length,
157 int this.replacementCodepoint = 157 int this.replacementCodepoint =
158 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : 158 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
159 utf8EncodedBytesIterator = (new _ListRange(utf8EncodedBytes, offset, 159 utf8EncodedBytesIterator = (new _ListRange(utf8EncodedBytes, offset,
(...skipping 26 matching lines...) Expand all
186 bool hasNext() => utf8EncodedBytesIterator.hasNext(); 186 bool hasNext() => utf8EncodedBytesIterator.hasNext();
187 187
188 int next() { 188 int next() {
189 int value = utf8EncodedBytesIterator.next(); 189 int value = utf8EncodedBytesIterator.next();
190 int additionalBytes = 0; 190 int additionalBytes = 0;
191 191
192 if (value < 0) { 192 if (value < 0) {
193 if (replacementCodepoint != null) { 193 if (replacementCodepoint != null) {
194 return replacementCodepoint; 194 return replacementCodepoint;
195 } else { 195 } else {
196 throw new IllegalArgumentException( 196 throw new ArgumentError(
197 "Invalid UTF8 at ${utf8EncodedBytesIterator.position}"); 197 "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");
198 } 198 }
199 } else if (value <= _UTF8_ONE_BYTE_MAX) { 199 } else if (value <= _UTF8_ONE_BYTE_MAX) {
200 return value; 200 return value;
201 } else if (value < _UTF8_FIRST_BYTE_OF_TWO_BASE) { 201 } else if (value < _UTF8_FIRST_BYTE_OF_TWO_BASE) {
202 if (replacementCodepoint != null) { 202 if (replacementCodepoint != null) {
203 return replacementCodepoint; 203 return replacementCodepoint;
204 } else { 204 } else {
205 throw new IllegalArgumentException( 205 throw new ArgumentError(
206 "Invalid UTF8 at ${utf8EncodedBytesIterator.position}"); 206 "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");
207 } 207 }
208 } else if (value < _UTF8_FIRST_BYTE_OF_THREE_BASE) { 208 } else if (value < _UTF8_FIRST_BYTE_OF_THREE_BASE) {
209 value -= _UTF8_FIRST_BYTE_OF_TWO_BASE; 209 value -= _UTF8_FIRST_BYTE_OF_TWO_BASE;
210 additionalBytes = 1; 210 additionalBytes = 1;
211 } else if (value < _UTF8_FIRST_BYTE_OF_FOUR_BASE) { 211 } else if (value < _UTF8_FIRST_BYTE_OF_FOUR_BASE) {
212 value -= _UTF8_FIRST_BYTE_OF_THREE_BASE; 212 value -= _UTF8_FIRST_BYTE_OF_THREE_BASE;
213 additionalBytes = 2; 213 additionalBytes = 2;
214 } else if (value < _UTF8_FIRST_BYTE_OF_FIVE_BASE) { 214 } else if (value < _UTF8_FIRST_BYTE_OF_FIVE_BASE) {
215 value -= _UTF8_FIRST_BYTE_OF_FOUR_BASE; 215 value -= _UTF8_FIRST_BYTE_OF_FOUR_BASE;
216 additionalBytes = 3; 216 additionalBytes = 3;
217 } else if (value < _UTF8_FIRST_BYTE_OF_SIX_BASE) { 217 } else if (value < _UTF8_FIRST_BYTE_OF_SIX_BASE) {
218 value -= _UTF8_FIRST_BYTE_OF_FIVE_BASE; 218 value -= _UTF8_FIRST_BYTE_OF_FIVE_BASE;
219 additionalBytes = 4; 219 additionalBytes = 4;
220 } else if (value < _UTF8_FIRST_BYTE_BOUND_EXCL) { 220 } else if (value < _UTF8_FIRST_BYTE_BOUND_EXCL) {
221 value -= _UTF8_FIRST_BYTE_OF_SIX_BASE; 221 value -= _UTF8_FIRST_BYTE_OF_SIX_BASE;
222 additionalBytes = 5; 222 additionalBytes = 5;
223 } else if (replacementCodepoint != null) { 223 } else if (replacementCodepoint != null) {
224 return replacementCodepoint; 224 return replacementCodepoint;
225 } else { 225 } else {
226 throw new IllegalArgumentException( 226 throw new ArgumentError(
227 "Invalid UTF8 at ${utf8EncodedBytesIterator.position}"); 227 "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");
228 } 228 }
229 int j = 0; 229 int j = 0;
230 while (j < additionalBytes && utf8EncodedBytesIterator.hasNext()) { 230 while (j < additionalBytes && utf8EncodedBytesIterator.hasNext()) {
231 int nextValue = utf8EncodedBytesIterator.next(); 231 int nextValue = utf8EncodedBytesIterator.next();
232 if (nextValue > _UTF8_ONE_BYTE_MAX && 232 if (nextValue > _UTF8_ONE_BYTE_MAX &&
233 nextValue < _UTF8_FIRST_BYTE_OF_TWO_BASE) { 233 nextValue < _UTF8_FIRST_BYTE_OF_TWO_BASE) {
234 value = ((value << 6) | (nextValue & _UTF8_LO_SIX_BIT_MASK)); 234 value = ((value << 6) | (nextValue & _UTF8_LO_SIX_BIT_MASK));
235 } else { 235 } else {
236 // if sequence-starting code unit, reposition cursor to start here 236 // if sequence-starting code unit, reposition cursor to start here
(...skipping 10 matching lines...) Expand all
247 bool nonOverlong = 247 bool nonOverlong =
248 (additionalBytes == 1 && value > _UTF8_ONE_BYTE_MAX) || 248 (additionalBytes == 1 && value > _UTF8_ONE_BYTE_MAX) ||
249 (additionalBytes == 2 && value > _UTF8_TWO_BYTE_MAX) || 249 (additionalBytes == 2 && value > _UTF8_TWO_BYTE_MAX) ||
250 (additionalBytes == 3 && value > _UTF8_THREE_BYTE_MAX); 250 (additionalBytes == 3 && value > _UTF8_THREE_BYTE_MAX);
251 bool inRange = value <= UNICODE_VALID_RANGE_MAX; 251 bool inRange = value <= UNICODE_VALID_RANGE_MAX;
252 if (validSequence && nonOverlong && inRange) { 252 if (validSequence && nonOverlong && inRange) {
253 return value; 253 return value;
254 } else if (replacementCodepoint != null) { 254 } else if (replacementCodepoint != null) {
255 return replacementCodepoint; 255 return replacementCodepoint;
256 } else { 256 } else {
257 throw new IllegalArgumentException( 257 throw new ArgumentError(
258 "Invalid UTF8 at ${utf8EncodedBytesIterator.position - j}"); 258 "Invalid UTF8 at ${utf8EncodedBytesIterator.position - j}");
259 } 259 }
260 } 260 }
261 } 261 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698