lib/utf/utf8.dart - Issue 10989013: Change IllegalArgumentException to ArgumentError.

Side by Side Diff: lib/utf/utf8.dart

Issue 10989013: Change IllegalArgumentException to ArgumentError. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Updated co19 test expectations. Created 8 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« lib/core/errors.dart ('K') | « lib/utf/utf32.dart ('k') | lib/utf/utf_core.dart » ('j') | tests/co19/co19-dart2js.status » ('J')

OLD	NEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 const int _UTF8_ONE_BYTE_MAX = 0x7f;	5 const int _UTF8_ONE_BYTE_MAX = 0x7f;

6 const int _UTF8_TWO_BYTE_MAX = 0x7ff;	6 const int _UTF8_TWO_BYTE_MAX = 0x7ff;

7 const int _UTF8_THREE_BYTE_MAX = 0xffff;	7 const int _UTF8_THREE_BYTE_MAX = 0xffff;

8	8

9 const int _UTF8_LO_SIX_BIT_MASK = 0x3f;	9 const int _UTF8_LO_SIX_BIT_MASK = 0x3f;

10	10

11 const int _UTF8_FIRST_BYTE_OF_TWO_BASE = 0xc0;	11 const int _UTF8_FIRST_BYTE_OF_TWO_BASE = 0xc0;

12 const int _UTF8_FIRST_BYTE_OF_THREE_BASE = 0xe0;	12 const int _UTF8_FIRST_BYTE_OF_THREE_BASE = 0xe0;

13 const int _UTF8_FIRST_BYTE_OF_FOUR_BASE = 0xf0;	13 const int _UTF8_FIRST_BYTE_OF_FOUR_BASE = 0xf0;

14 const int _UTF8_FIRST_BYTE_OF_FIVE_BASE = 0xf8;	14 const int _UTF8_FIRST_BYTE_OF_FIVE_BASE = 0xf8;

15 const int _UTF8_FIRST_BYTE_OF_SIX_BASE = 0xfc;	15 const int _UTF8_FIRST_BYTE_OF_SIX_BASE = 0xfc;

16	16

17 const int _UTF8_FIRST_BYTE_OF_TWO_MASK = 0x1f;	17 const int _UTF8_FIRST_BYTE_OF_TWO_MASK = 0x1f;

18 const int _UTF8_FIRST_BYTE_OF_THREE_MASK = 0xf;	18 const int _UTF8_FIRST_BYTE_OF_THREE_MASK = 0xf;

19 const int _UTF8_FIRST_BYTE_OF_FOUR_MASK = 0x7;	19 const int _UTF8_FIRST_BYTE_OF_FOUR_MASK = 0x7;

20	20

21 const int _UTF8_FIRST_BYTE_BOUND_EXCL = 0xfe;	21 const int _UTF8_FIRST_BYTE_BOUND_EXCL = 0xfe;

22 const int _UTF8_SUBSEQUENT_BYTE_BASE = 0x80;	22 const int _UTF8_SUBSEQUENT_BYTE_BASE = 0x80;

23	23

24 /**	24 /**

25 * Decodes the UTF-8 bytes as an iterable. Thus, the consumer can only convert	25 * Decodes the UTF-8 bytes as an iterable. Thus, the consumer can only convert

26 * as much of the input as needed. Set the replacementCharacter to null to	26 * as much of the input as needed. Set the replacementCharacter to null to

27 * throw an IllegalArgumentException rather than replace the bad value.	27 * throw an ArgumentError rather than replace the bad value.

28 */	28 */

29 IterableUtf8Decoder decodeUtf8AsIterable(List<int> bytes, [int offset = 0,	29 IterableUtf8Decoder decodeUtf8AsIterable(List<int> bytes, [int offset = 0,

30 int length,	30 int length,

31 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	31 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

32 return new IterableUtf8Decoder(bytes, offset, length, replacementCodepoint);	32 return new IterableUtf8Decoder(bytes, offset, length, replacementCodepoint);

33 }	33 }

34	34

35 /**	35 /**

36 * Produce a String from a List of UTF-8 encoded bytes. The parameters	36 * Produce a String from a List of UTF-8 encoded bytes. The parameters

37 * can set an offset into a list of bytes (as int), limit the length of the	37 * can set an offset into a list of bytes (as int), limit the length of the

38 * values to be decoded, and override the default Unicode replacement character.	38 * values to be decoded, and override the default Unicode replacement character.

39 * Set the replacementCharacter to null to throw an IllegalArgumentException	39 * Set the replacementCharacter to null to throw an ArgumentError

40 * rather than replace the bad value.	40 * rather than replace the bad value.

41 */	41 */

42 String decodeUtf8(List<int> bytes, [int offset = 0, int length,	42 String decodeUtf8(List<int> bytes, [int offset = 0, int length,

43 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	43 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

44 return codepointsToString(	44 return codepointsToString(

45 (new Utf8Decoder(bytes, offset, length, replacementCodepoint))	45 (new Utf8Decoder(bytes, offset, length, replacementCodepoint))

46 .decodeRest());	46 .decodeRest());

47 }	47 }

48	48

49 /**	49 /**

(...skipping 89 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
139	139

140 Utf8Decoder iterator() => new Utf8Decoder(bytes, offset, length,	140 Utf8Decoder iterator() => new Utf8Decoder(bytes, offset, length,

141 replacementCodepoint);	141 replacementCodepoint);

142 }	142 }

143	143

144 /**	144 /**

145 * Provides an iterator of Unicode codepoints from UTF-8 encoded bytes. The	145 * Provides an iterator of Unicode codepoints from UTF-8 encoded bytes. The

146 * parameters can set an offset into a list of bytes (as int), limit the length	146 * parameters can set an offset into a list of bytes (as int), limit the length

147 * of the values to be decoded, and override the default Unicode replacement	147 * of the values to be decoded, and override the default Unicode replacement

148 * character. Set the replacementCharacter to null to throw an	148 * character. Set the replacementCharacter to null to throw an

149 * IllegalArgumentException rather than replace the bad value. The return value	149 * ArgumentError rather than replace the bad value. The return value

150 * from this method can be used as an Iterable (e.g. in a for-loop).	150 * from this method can be used as an Iterable (e.g. in a for-loop).

151 */	151 */

152 class Utf8Decoder implements Iterator<int> {	152 class Utf8Decoder implements Iterator<int> {

153 final _ListRangeIterator utf8EncodedBytesIterator;	153 final _ListRangeIterator utf8EncodedBytesIterator;

154 final int replacementCodepoint;	154 final int replacementCodepoint;

155	155

156 Utf8Decoder(List<int> utf8EncodedBytes, [int offset = 0, int length,	156 Utf8Decoder(List<int> utf8EncodedBytes, [int offset = 0, int length,

157 int this.replacementCodepoint =	157 int this.replacementCodepoint =

158 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :	158 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :

159 utf8EncodedBytesIterator = (new _ListRange(utf8EncodedBytes, offset,	159 utf8EncodedBytesIterator = (new _ListRange(utf8EncodedBytes, offset,

(...skipping 26 matching lines...) Expand all Loading...
186 bool hasNext() => utf8EncodedBytesIterator.hasNext();	186 bool hasNext() => utf8EncodedBytesIterator.hasNext();

187	187

188 int next() {	188 int next() {

189 int value = utf8EncodedBytesIterator.next();	189 int value = utf8EncodedBytesIterator.next();

190 int additionalBytes = 0;	190 int additionalBytes = 0;

191	191

192 if (value < 0) {	192 if (value < 0) {

193 if (replacementCodepoint != null) {	193 if (replacementCodepoint != null) {

194 return replacementCodepoint;	194 return replacementCodepoint;

195 } else {	195 } else {

196 throw new IllegalArgumentException(	196 throw new ArgumentError(

197 "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");	197 "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");

198 }	198 }

199 } else if (value <= _UTF8_ONE_BYTE_MAX) {	199 } else if (value <= _UTF8_ONE_BYTE_MAX) {

200 return value;	200 return value;

201 } else if (value < _UTF8_FIRST_BYTE_OF_TWO_BASE) {	201 } else if (value < _UTF8_FIRST_BYTE_OF_TWO_BASE) {

202 if (replacementCodepoint != null) {	202 if (replacementCodepoint != null) {

203 return replacementCodepoint;	203 return replacementCodepoint;

204 } else {	204 } else {

205 throw new IllegalArgumentException(	205 throw new ArgumentError(

206 "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");	206 "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");

207 }	207 }

208 } else if (value < _UTF8_FIRST_BYTE_OF_THREE_BASE) {	208 } else if (value < _UTF8_FIRST_BYTE_OF_THREE_BASE) {

209 value -= _UTF8_FIRST_BYTE_OF_TWO_BASE;	209 value -= _UTF8_FIRST_BYTE_OF_TWO_BASE;

210 additionalBytes = 1;	210 additionalBytes = 1;

211 } else if (value < _UTF8_FIRST_BYTE_OF_FOUR_BASE) {	211 } else if (value < _UTF8_FIRST_BYTE_OF_FOUR_BASE) {

212 value -= _UTF8_FIRST_BYTE_OF_THREE_BASE;	212 value -= _UTF8_FIRST_BYTE_OF_THREE_BASE;

213 additionalBytes = 2;	213 additionalBytes = 2;

214 } else if (value < _UTF8_FIRST_BYTE_OF_FIVE_BASE) {	214 } else if (value < _UTF8_FIRST_BYTE_OF_FIVE_BASE) {

215 value -= _UTF8_FIRST_BYTE_OF_FOUR_BASE;	215 value -= _UTF8_FIRST_BYTE_OF_FOUR_BASE;

216 additionalBytes = 3;	216 additionalBytes = 3;

217 } else if (value < _UTF8_FIRST_BYTE_OF_SIX_BASE) {	217 } else if (value < _UTF8_FIRST_BYTE_OF_SIX_BASE) {

218 value -= _UTF8_FIRST_BYTE_OF_FIVE_BASE;	218 value -= _UTF8_FIRST_BYTE_OF_FIVE_BASE;

219 additionalBytes = 4;	219 additionalBytes = 4;

220 } else if (value < _UTF8_FIRST_BYTE_BOUND_EXCL) {	220 } else if (value < _UTF8_FIRST_BYTE_BOUND_EXCL) {

221 value -= _UTF8_FIRST_BYTE_OF_SIX_BASE;	221 value -= _UTF8_FIRST_BYTE_OF_SIX_BASE;

222 additionalBytes = 5;	222 additionalBytes = 5;

223 } else if (replacementCodepoint != null) {	223 } else if (replacementCodepoint != null) {

224 return replacementCodepoint;	224 return replacementCodepoint;

225 } else {	225 } else {

226 throw new IllegalArgumentException(	226 throw new ArgumentError(

227 "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");	227 "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");

228 }	228 }

229 int j = 0;	229 int j = 0;

230 while (j < additionalBytes && utf8EncodedBytesIterator.hasNext()) {	230 while (j < additionalBytes && utf8EncodedBytesIterator.hasNext()) {

231 int nextValue = utf8EncodedBytesIterator.next();	231 int nextValue = utf8EncodedBytesIterator.next();

232 if (nextValue > _UTF8_ONE_BYTE_MAX &&	232 if (nextValue > _UTF8_ONE_BYTE_MAX &&

233 nextValue < _UTF8_FIRST_BYTE_OF_TWO_BASE) {	233 nextValue < _UTF8_FIRST_BYTE_OF_TWO_BASE) {

234 value = ((value << 6) \| (nextValue & _UTF8_LO_SIX_BIT_MASK));	234 value = ((value << 6) \| (nextValue & _UTF8_LO_SIX_BIT_MASK));

235 } else {	235 } else {

236 // if sequence-starting code unit, reposition cursor to start here	236 // if sequence-starting code unit, reposition cursor to start here

(...skipping 10 matching lines...) Expand all Loading...
247 bool nonOverlong =	247 bool nonOverlong =

248 (additionalBytes == 1 && value > _UTF8_ONE_BYTE_MAX) \|\|	248 (additionalBytes == 1 && value > _UTF8_ONE_BYTE_MAX) \|\|

249 (additionalBytes == 2 && value > _UTF8_TWO_BYTE_MAX) \|\|	249 (additionalBytes == 2 && value > _UTF8_TWO_BYTE_MAX) \|\|

250 (additionalBytes == 3 && value > _UTF8_THREE_BYTE_MAX);	250 (additionalBytes == 3 && value > _UTF8_THREE_BYTE_MAX);

251 bool inRange = value <= UNICODE_VALID_RANGE_MAX;	251 bool inRange = value <= UNICODE_VALID_RANGE_MAX;

252 if (validSequence && nonOverlong && inRange) {	252 if (validSequence && nonOverlong && inRange) {

253 return value;	253 return value;

254 } else if (replacementCodepoint != null) {	254 } else if (replacementCodepoint != null) {

255 return replacementCodepoint;	255 return replacementCodepoint;

256 } else {	256 } else {

257 throw new IllegalArgumentException(	257 throw new ArgumentError(

258 "Invalid UTF8 at ${utf8EncodedBytesIterator.position - j}");	258 "Invalid UTF8 at ${utf8EncodedBytesIterator.position - j}");

259 }	259 }

260 }	260 }

261 }	261 }

OLD	NEW