runtime/lib/convert_patch.dart - Issue 649113005: Make JSON parsing work as a chunked conversion sink.

Side by Side Diff: runtime/lib/convert_patch.dart

Issue 649113005: Make JSON parsing work as a chunked conversion sink. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Address comments. Fix bug. Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

	5 import "dart:_internal" show POWERS_OF_TEN;

	6

5 // JSON conversion.	7 // JSON conversion.

6	8

7 patch _parseJson(String json, reviver(var key, var value)) {	9 patch _parseJson(String json, reviver(var key, var value)) {

8 _BuildJsonListener listener;	10 _BuildJsonListener listener;

9 if (reviver == null) {	11 if (reviver == null) {

10 listener = new _BuildJsonListener();	12 listener = new _BuildJsonListener();

11 } else {	13 } else {

12 listener = new _ReviverJsonListener(reviver);	14 listener = new _ReviverJsonListener(reviver);

13 }	15 }

14 new _JsonParser(json, listener).parse();	16 var parser = new _JsonStringParser(listener);

	17 parser.chunk = json;

	18 parser.chunkEnd = json.length;

	19 parser.parse(0);

	20 parser.close();

15 return listener.result;	21 return listener.result;

16 }	22 }

17	23

18 //// Implementation ///////////////////////////////////////////////////////////	24 //// Implementation ///////////////////////////////////////////////////////////

19	25

20 // Simple API for JSON parsing.	26 // Simple API for JSON parsing.

21	27

	28 /**

	29 * Listener for parsing events from [_ChunkedJsonParser].

	30 */

22 abstract class _JsonListener {	31 abstract class _JsonListener {

23 void handleString(String value) {}	32 void handleString(String value) {}

24 void handleNumber(num value) {}	33 void handleNumber(num value) {}

25 void handleBool(bool value) {}	34 void handleBool(bool value) {}

26 void handleNull() {}	35 void handleNull() {}

27 void beginObject() {}	36 void beginObject() {}

28 void propertyName() {}	37 void propertyName() {}

29 void propertyValue() {}	38 void propertyValue() {}

30 void endObject() {}	39 void endObject() {}

31 void beginArray() {}	40 void beginArray() {}

32 void arrayElement() {}	41 void arrayElement() {}

33 void endArray() {}	42 void endArray() {}

34 }	43 }

35	44

36 /**	45 /**

37 * A [JsonListener] that builds data objects from the parser events.	46 * A [_JsonListener] that builds data objects from the parser events.

38 *	47 *

39 * This is a simple stack-based object builder. It keeps the most recently	48 * This is a simple stack-based object builder. It keeps the most recently

40 * seen value in a variable, and uses it depending on the following event.	49 * seen value in a variable, and uses it depending on the following event.

41 */	50 */

42 class _BuildJsonListener extends _JsonListener {	51 class _BuildJsonListener extends _JsonListener {

43 /**	52 /**

44 * Stack used to handle nested containers.	53 * Stack used to handle nested containers.

45 *	54 *

46 * The current container is pushed on the stack when a new one is	55 * The current container is pushed on the stack when a new one is

47 * started. If the container is a [Map], there is also a current [key]	56 * started. If the container is a [Map], there is also a current [key]

(...skipping 80 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
128 void propertyValue() {	137 void propertyValue() {

129 value = reviver(key, value);	138 value = reviver(key, value);

130 super.propertyValue();	139 super.propertyValue();

131 }	140 }

132	141

133 get result {	142 get result {

134 return reviver(null, value);	143 return reviver(null, value);

135 }	144 }

136 }	145 }

137	146

138 class _JsonParser {	147 /**

	148 * Buffer holding parts of a numeral.

	149 *

	150 * The buffer contains the characters of a JSON number.

	151 * These are all ASCII, so an [Uint8List] is used as backing store.

	152 *

	153 * This buffer is used when a JSON number is split between separate chunks.

	154 *

	155 */

	156 class _NumberBuffer {

	157 static const int minCapacity = 16;

	158 static const int kDefaultOverhead = 5;

	159 Uint8List list;

	160 int length = 0;

	161 _NumberBuffer(int initialCapacity)

	162 : list = new Uint8List(_initialCapacity(initialCapacity));

	163

	164 int get capacity => list.length;

	165

	166 // Pick an initial capacity greater than the first part's size.

	167 // The typical use case has two parts, this is the attempt at

	168 // guessing the size of the second part without overdoing it.

	169 // The default estimate of the second part is [kDefaultOverhead],

	170 // then round to multiplum of four, and return the result,

	171 // or [minCapacity] if that is greater.

	172 static int _initialCapacity(int minCapacity) {

	173 minCapacity += kDefaultOverhead;

	174 if (minCapacity < minCapacity) return minCapacity;

	175 minCapacity = (minCapacity + 3) & ~3; // Round to multiple of four.

	176 return minCapacity;

	177 }

	178

	179 // Grows to the exact size asked for.

	180 void ensureCapacity(int newCapacity) {

	181 Uint8List list = this.list;

	182 if (newCapacity <= list.length) return;

	183 Uint8List newList = new Uint8List(newCapacity);

	184 newList.setRange(0, list.length, list, 0);

	185 this.list = newList;

	186 }

	187

	188 String getString() {

	189 var list = this.list;

	190 if (length < list.length) {

	191 list = new Uint8List.view(list.buffer, 0, length);

	192 }

	193 String result = new String.fromCharCodes(list);

	194 return result;

	195 }

	196

	197 // TODO(lrn): See if parsing of numbers can be abstracted to something

	198 // not only working on strings, but also on char-code lists, without lossing

	199 // performance.

	200 int parseInt() => int.parse(getString());

	201 double parseDouble() => double.parse(getString());

	202 }

	203

	204 /**

	205 * Chunked JSON parser.

	206 *

	207 * Receives inputs in chunks, gives access to individual parts of the input,

	208 * and stores input state between chunks.

	209 *

	210 * Implementations include [String] and UTF-8 parsers.

	211 */

	212 abstract class _ChunkedJsonParser {

139 // A simple non-recursive state-based parser for JSON.	213 // A simple non-recursive state-based parser for JSON.

140 //	214 //

141 // Literal values accepted in states ARRAY_EMPTY, ARRAY_COMMA, OBJECT_COLON	215 // Literal values accepted in states ARRAY_EMPTY, ARRAY_COMMA, OBJECT_COLON

142 // and strings also in OBJECT_EMPTY, OBJECT_COMMA.	216 // and strings also in OBJECT_EMPTY, OBJECT_COMMA.

143 // VALUE STRING : , } ] Transitions to	217 // VALUE STRING : , } ] Transitions to

144 // EMPTY X X -> END	218 // EMPTY X X -> END

145 // ARRAY_EMPTY X X @ -> ARRAY_VALUE / pop	219 // ARRAY_EMPTY X X @ -> ARRAY_VALUE / pop

146 // ARRAY_VALUE @ @ -> ARRAY_COMMA / pop	220 // ARRAY_VALUE @ @ -> ARRAY_COMMA / pop

147 // ARRAY_COMMA X X -> ARRAY_VALUE	221 // ARRAY_COMMA X X -> ARRAY_VALUE

148 // OBJECT_EMPTY X @ -> OBJECT_KEY / pop	222 // OBJECT_EMPTY X @ -> OBJECT_KEY / pop

(...skipping 16 matching lines...) Expand all Loading...
165 static const int INSIDE_OBJECT = 2;	239 static const int INSIDE_OBJECT = 2;

166 static const int AFTER_COLON = 3; // Always inside object.	240 static const int AFTER_COLON = 3; // Always inside object.

167	241

168 static const int ALLOW_STRING_MASK = 8; // Allowed if zero.	242 static const int ALLOW_STRING_MASK = 8; // Allowed if zero.

169 static const int ALLOW_VALUE_MASK = 4; // Allowed if zero.	243 static const int ALLOW_VALUE_MASK = 4; // Allowed if zero.

170 static const int ALLOW_VALUE = 0;	244 static const int ALLOW_VALUE = 0;

171 static const int STRING_ONLY = 4;	245 static const int STRING_ONLY = 4;

172 static const int NO_VALUES = 12;	246 static const int NO_VALUES = 12;

173	247

174 // Objects and arrays are "empty" until their first property/element.	248 // Objects and arrays are "empty" until their first property/element.

	249 // At this position, they may either have an entry or a close-bracket.

175 static const int EMPTY = 0;	250 static const int EMPTY = 0;

176 static const int NON_EMPTY = 16;	251 static const int NON_EMPTY = 16;

177 static const int EMPTY_MASK = 16; // Empty if zero.	252 static const int EMPTY_MASK = 16; // Empty if zero.

178	253

179

180 static const int VALUE_READ_BITS = NO_VALUES \| NON_EMPTY;	254 static const int VALUE_READ_BITS = NO_VALUES \| NON_EMPTY;

181	255

182 // Actual states.	256 // Actual states.

183 static const int STATE_INITIAL = EMPTY \| ALLOW_VALUE;	257 static const int STATE_INITIAL = EMPTY \| ALLOW_VALUE;

184 static const int STATE_END = NON_EMPTY \| NO_VALUES;	258 static const int STATE_END = NON_EMPTY \| NO_VALUES;

185	259

186 static const int STATE_ARRAY_EMPTY = INSIDE_ARRAY \| EMPTY \| ALLOW_VALUE;	260 static const int STATE_ARRAY_EMPTY = INSIDE_ARRAY \| EMPTY \| ALLOW_VALUE;

187 static const int STATE_ARRAY_VALUE = INSIDE_ARRAY \| NON_EMPTY \| NO_VALUES;	261 static const int STATE_ARRAY_VALUE = INSIDE_ARRAY \| NON_EMPTY \| NO_VALUES;

188 static const int STATE_ARRAY_COMMA = INSIDE_ARRAY \| NON_EMPTY \| ALLOW_VALUE;	262 static const int STATE_ARRAY_COMMA = INSIDE_ARRAY \| NON_EMPTY \| ALLOW_VALUE;

189	263

(...skipping 29 matching lines...) Expand all Loading...
219 static const int CHAR_f = 0x66;	293 static const int CHAR_f = 0x66;

220 static const int CHAR_l = 0x6c;	294 static const int CHAR_l = 0x6c;

221 static const int CHAR_n = 0x6e;	295 static const int CHAR_n = 0x6e;

222 static const int CHAR_r = 0x72;	296 static const int CHAR_r = 0x72;

223 static const int CHAR_s = 0x73;	297 static const int CHAR_s = 0x73;

224 static const int CHAR_t = 0x74;	298 static const int CHAR_t = 0x74;

225 static const int CHAR_u = 0x75;	299 static const int CHAR_u = 0x75;

226 static const int LBRACE = 0x7b;	300 static const int LBRACE = 0x7b;

227 static const int RBRACE = 0x7d;	301 static const int RBRACE = 0x7d;

228	302

229 final String source;	303 // State of partial value at chunk split.

	304 static const int NO_PARTIAL = 0;

	305 static const int PARTIAL_STRING = 1;

	306 static const int PARTIAL_NUMERAL = 2;

	307 static const int PARTIAL_KEYWORD = 3;

	308 static const int MASK_PARTIAL = 3;

	309

	310 // Partial states for numerals. Values can be \|'ed with PARTIAL_NUMERAL.

	311 static const int NUM_SIGN = 0; // After initial '-'.

	312 static const int NUM_ZERO = 4; // After '0' as first digit.

	313 static const int NUM_DIGIT = 8; // After digit, no '.' or 'e' seen.

	314 static const int NUM_DOT = 12; // After '.'.

	315 static const int NUM_DOT_DIGIT = 16; // After a decimal digit (after '.').

	316 static const int NUM_E = 20; // After 'e' or 'E'.

	317 static const int NUM_E_SIGN = 24; // After '-' or '+' after 'e' or 'E'.

	318 static const int NUM_E_DIGIT = 28; // After exponent digit.

	319 static const int NUM_SUCCESS = 32; // Never stored as partial state.

	320

	321 // Partial states for strings.

	322 static const int STR_PLAIN = 0; // Inside string, but not escape.

	323 static const int STR_ESCAPE = 4; // After '\'.

	324 static const int STR_U = 16; // After '\u' and 0-3 hex digits.

	325 static const int STR_U_COUNT_SHIFT = 2; // Hex digit count in bits 2-3.

	326 static const int STR_U_VALUE_SHIFT = 5; // Hex digit value in bits 5+.

	327

	328 // Partial states for keywords.

	329 static const int KWD_TYPE_MASK = 12;

	330 static const int KWD_TYPE_SHIFT = 2;

	331 static const int KWD_NULL = 0; // Prefix of "null" seen.

	332 static const int KWD_TRUE = 4; // Prefix of "true" seen.

	333 static const int KWD_FALSE = 8; // Prefix of "false" seen.

	334 static const int KWD_COUNT_SHIFT = 4; // Prefix length in bits 4+.

	335

	336 // Mask used to mask off two lower bits.

	337 static const int TWO_BIT_MASK = 3;

	338

230 final _JsonListener listener;	339 final _JsonListener listener;

231 _JsonParser(this.source, this.listener);	340

232	341 // The current parsing state.

233 /** Parses [source], or throws if it fails. */	342 int state = STATE_INITIAL;

234 void parse() {	343 List<int> states = <int>[];

235 final List<int> states = <int>[];	344

236 int state = STATE_INITIAL;	345 /**

237 int position = 0;	346 * Stores tokenizer state between chunks.

238 int length = source.length;	347 *

	348 * This state is stored when a chunk stops in the middle of a

	349 * token (string, numeral, boolean or null).

	350 *

	351 * The partial state is used to continue parsing on the next chunk.

	352 * The previous chunk is not retained, any data needed are stored in

	353 * this integer, or in the [buffer] field as a string-building buffer

	354 * or a [_NumberBuffer].

	355 *

	356 * Prefix state stored in [prefixState] as bits.

	357 *

	358 * ..00 : No partial value (NO_PARTIAL).

	359 *

	360 * ..00001 : Partial string, not inside escape.

	361 * ..00101 : Partial string, after '\'.

	362 * ..vvvv1dd01 : Partial \u escape.

	363 * The 'dd' bits (2-3) encode the number of hex digits seen.

	364 * Bits 5-16 encode the value of the hex digits seen so far.

	365 *

	366 * ..0ddd10 : Partial numeral.

	367 * The `ddd` bits store the parts of in the numeral seen so

	368 * far, as the constants `NUM_*` defined above.

	369 * The characters of the numeral are stored in [buffer]

	370 * as a [_NumberBuffer].

	371 *

	372 * ..0ddd0011 : Partial 'null' keyword.

	373 * ..0ddd0111 : Partial 'true' keyword.

	374 * ..0ddd1011 : Partial 'false' keyword.

	375 * For all three keywords, the `ddd` bits encode the number

	376 * of letters seen.

	377 */

	378 int partialState = NO_PARTIAL;

	379

	380 /**

	381 * Extra data stored while parsing a primitive value.

	382 * May be set during parsing, always set at chunk end if a value is partial.

	383 *

	384 * May contain a string buffer while parsing strings.

	385 */

	386 var buffer = null;

	387

	388 _ChunkedJsonParser(this.listener);

	389

	390 /**

	391 * Push the current parse [state] on a stack.

	392 *

	393 * State is pushed when a new array or object literal starts,

	394 * so the parser can go back to the correct value when the literal ends.

	395 */

	396 void saveState(int state) {

	397 states.add(state);

	398 }

	399

	400 /**

	401 * Restore a state pushed with [saveState].

	402 */

	403 int restoreState() {

	404 return states.removeLast(); // Throws if empty.

	405 }

	406

	407 /**

	408 * Finalizes the parsing.

	409 *

	410 * Throws if the source read so far doesn't end up with a complete

	411 * parsed value. That means it must not be inside a list or object

	412 * literal, and any partial value read should also be a valid complete

	413 * value.

	414 *

	415 * The only valid partial state is a number that ends in a digit, and

	416 * only if the number is the entire JSON value being parsed

	417 * (otherwise it would be inside a list or object).

	418 * Such a number will be completed. Any other partial state is an error.

	419 */

	420 void close() {

	421 if (partialState != NO_PARTIAL) {

	422 int partialType = partialState & MASK_PARTIAL;

	423 if (partialType == PARTIAL_NUMERAL) {

	424 int numState = partialState & ~MASK_PARTIAL;

	425 // A partial number might be a valid number if we know it's done.

	426 // There is an unnecessary overhead if input is a single number,

	427 // but this is assumed to be rare.

	428 _NumberBuffer buffer = this.buffer;

	429 this.buffer = null;

	430 finishChunkNumber(numState, 0, 0, buffer);

	431 } else if (partialType == PARTIAL_STRING) {

	432 fail(chunkEnd, "Unterminated string");

	433 } else {

	434 assert(partialType == PARTIAL_KEYWORD);

	435 fail(chunkEnd); // Incomplete literal.

	436 }

	437 }

	438 if (state != STATE_END) {

	439 fail(chunkEnd);

	440 }

	441 }

	442

	443 /**

	444 * Read out the result after successfully closing the parser.

	445 *

	446 * The parser is closed by calling [close] or calling [addSourceChunk] with

	447 * `true` as second (`isLast`) argument.

	448 */

	449 Object get result {

	450 return listener.result;

	451 }

	452

	453 /** Sets the current source chunk. */

	454 void set chunk(var source);

	455

	456 /**

	457 * Length of current chunk.

	458 *

	459 * The valid arguments to [getChar] are 0 .. `chunkEnd - 1`.

	460 */

	461 int get chunkEnd;

	462

	463 /**

	464 * Returns the chunk itself.

	465 *

	466 * Only used by [fail] to include the chunk in the thrown [FormatException].

	467 */

	468 get chunk;

	469

	470 /**

	471 * Get charcacter/code unit of current chunk.

	472 *

	473 * The [index] must be non-negative and less than `chunkEnd`.

	474 * In practive, [index] will be no smaller than the `start` argument passed

	475 * to [parse].

	476 */

	477 int getChar(int index);

	478

	479 /**

	480 * Copy ASCII characters from start to end of chunk into a list.

	481 *

	482 * Used for number buffer (always copies ASCII, so encoding is not important).

	483 */

	484 void copyCharsToList(int start, int end, List<int> target);

	485

	486 /**

	487 * Build a string using input code units.

	488 *

	489 * Creates a string buffer and enables adding characters and slices

	490 * to that buffer.

	491 * The buffer is stored in the [buffer] field. If the string is unterminated,

	492 * the same buffer is used to continue parsing in the next chunk.

	493 */

	494 void beginString();

	495 /**

	496 * Add single character code to string being built.

	497 *

	498 * Used for unparsed escape sequences.

	499 */

	500 void addCharToString(int charCode);

	501

	502 /**

	503 * Adds slice of current chunk to string being built.

	504 *

	505 * The [start] positions is inclusive, [end] is exclusive.

	506 */

	507 void addSliceToString(int start, int end);

	508

	509 /** Finalizes the string being built and returns it as a String. */

	510 String endString();

	511

	512 /**

	513 * Extracts a literal string from a slice of the current chunk.

	514 *

	515 * No interpretation of the content is performed, except for converting

	516 * the source format to string.

	517 * This can be implemented more or less efficiently depending on the

	518 * underlying source.

	519 *

	520 * This is used for string literals that contain no escapes.

	521 */

	522 String getString(int start, int end);

	523

	524 /**

	525 * Parse a slice of the current chunk as an integer.

	526 *

	527 * The format is expected to be correct.

	528 */

	529 int parseInt(int start, int end) {

	530 return int.parse(getString(start, end));

	531 }

	532

	533 /**

	534 * Parse a slice of the current chunk as a double.

	535 *

	536 * The format is expected to be correct.

	537 * This is used by [parseNumber] when the double value cannot be

	538 * built exactly during parsing.

	539 */

	540 double parseDouble(int start, int end) {

	541 return double.parse(getString(start, end));

	542 }

	543

	544 /**

	545 * Create a _NumberBuffer containing the digits from [start] to [chunkEnd].

	546 *

	547 * This creates a number buffer and initializes it with the part of the

	548 * number literal ending the current chunk

	549 */

	550 void createNumberBuffer(int start) {

	551 assert(start >= 0);

	552 assert(start < chunkEnd);

	553 int length = chunkEnd - start;

	554 var buffer = new _NumberBuffer(length);

	555 copyCharsToList(start, chunkEnd, buffer.list);

	556 buffer.length = length;

	557 return buffer;

	558 }

	559

	560 /**

	561 * Continues parsing a partial value.

	562 */

	563 int parsePartial(int position) {

	564 if (position == chunkEnd) return position;

	565 int partialState = this.partialState;

	566 assert(partialState != NO_PARTIAL);

	567 int partialType = partialState & MASK_PARTIAL;

	568 this.partialState = NO_PARTIAL;

	569 partialState = partialState & ~MASK_PARTIAL;

	570 assert(partialType != 0);

	571 if (partialType == PARTIAL_STRING) {

	572 position = parsePartialString(position, partialState);

	573 } else if (partialType == PARTIAL_NUMERAL) {

	574 position = parsePartialNumber(position, partialState);

	575 } else if (partialType == PARTIAL_KEYWORD) {

	576 position = parsePartialKeyword(position, partialState);

	577 }

	578 return position;

	579 }

	580

	581 /**

	582 * Parses the remainder of a number into the number buffer.

	583 *

	584 * Syntax is checked while pasing.

	585 * Starts at position, which is expected to be the start of the chunk,

	586 * and returns the index of the first non-number-literal character found,

	587 * or chunkEnd if the entire chunk is a valid number continuation.

	588 * Throws if a syntax error is detected.

	589 */

	590 int parsePartialNumber(int position, int state) {

	591 int start = position;

	592 // Primitive implementation, can be optimized.

	593 _NumberBuffer buffer = this.buffer;

	594 this.buffer = null;

	595 int end = chunkEnd;

	596 toBailout: {

	597 if (position == end) break toBailout;

	598 int char = getChar(position);

	599 int digit = char ^ CHAR_0;

	600 if (state == NUM_SIGN) {

	601 if (digit <= 9) {

	602 if (digit == 0) {

	603 state = NUM_ZERO;

	604 } else {

	605 state = NUM_DIGIT;

	606 }

	607 position++;

	608 if (position == end) break toBailout;

	609 char = getChar(position);

	610 digit = char ^ CHAR_0;

	611 } else {

	612 return fail(position);

	613 }

	614 }

	615 if (state == NUM_ZERO) {

	616 // JSON does not allow insignificant leading zeros (e.g., "09").

	617 if (digit <= 9) return fail(position);

	618 state = NUM_DIGIT;

	619 }

	620 while (state == NUM_DIGIT) {

	621 if (digit > 9) {

	622 if (char == DECIMALPOINT) {

	623 state = NUM_DOT;

	624 } else if ((char \| 0x20) == CHAR_e) {

	625 state = NUM_E;

	626 } else {

	627 finishChunkNumber(state, start, position, buffer);

	628 return position;

	629 }

	630 }

	631 position++;

	632 if (position == end) break toBailout;

	633 char = getChar(position);

	634 digit = char ^ CHAR_0;

	635 }

	636 if (state == NUM_DOT) {

	637 if (digit > 9) return fail(position);

	638 state = NUM_DOT_DIGIT;

	639 }

	640 while (state == NUM_DOT_DIGIT) {

	641 if (digit > 9) {

	642 if ((char \| 0x20) == CHAR_e) {

	643 state = NUM_E;

	644 } else {

	645 finishChunkNumber(state, start, position, buffer);

	646 return position;

	647 }

	648 }

	649 position++;

	650 if (position == end) break toBailout;

	651 char = getChar(position);

	652 digit = char ^ CHAR_0;

	653 }

	654 if (state == NUM_E) {

	655 if (char == PLUS \|\| char == MINUS) {

	656 state = NUM_E_SIGN;

	657 position++;

	658 if (position == end) break toBailout;

	659 char = getChar(position);

	660 digit = char ^ CHAR_0;

	661 }

	662 }

	663 assert(state >= NUM_E);

	664 while (digit <= 9) {

	665 state = NUM_E_DIGIT;

	666 position++;

	667 if (position == end) break toBailout;

	668 char = getChar(position);

	669 digit = char ^ CHAR_0;

	670 }

	671 finishChunkNumber(state, start, position, buffer);

	672 return position;

	673 }

	674 // Bailout code in case the current chunk ends while parsing the numeral.

	675 assert(position == end);

	676 continueChunkNumber(state, start, buffer);

	677 return chunkEnd;

	678 }

	679

	680 /**

	681 * Continues parsing a partial string literal.

	682 *

	683 * Handles partial escapes and then hands the parsing off to

	684 * [parseStringToBuffer].

	685 */

	686 int parsePartialString(int position, int partialState) {

	687 if (partialState == STR_PLAIN) {

	688 return parseStringToBuffer(position);

	689 }

	690 if (partialState == STR_ESCAPE) {

	691 position = parseStringEscape(position);

	692 // parseStringEscape sets partialState if it sees the end.

	693 if (position == chunkEnd) return position;

	694 return parseStringToBuffer(position);

	695 }

	696 assert((partialState & STR_U) != 0);

	697 int value = partialState >> STR_U_VALUE_SHIFT;

	698 int count = (partialState >> STR_U_COUNT_SHIFT) & TWO_BIT_MASK;

	699 for (int i = count; i < 4; i++, position++) {

	700 if (position == chunkEnd) return chunkStringEscapeU(i, value);

	701 int char = getChar(position);

	702 int digit = parseHexDigit(char);

	703 if (digit < 0) fail(position, "Invalid hex digit");

	704 value = 16 * value + digit;

	705 }

	706 addCharToString(value);

	707 return parseStringToBuffer(position);

	708 }

	709

	710 /**

	711 * Continues parsing a partial keyword.

	712 */

	713 int parsePartialKeyword(int position, int partialState) {

	714 int keywordType = partialState & KWD_TYPE_MASK;

	715 int count = partialState >> KWD_COUNT_SHIFT;

	716 int keywordTypeIndex = keywordType >> KWD_TYPE_SHIFT;

	717 String keyword = const ["null", "true", "false"][keywordTypeIndex];

	718 assert(count < keyword.length);

	719 do {

	720 if (position == chunkEnd) {

	721 this.partialState =

	722 PARTIAL_KEYWORD \| keywordType \| (count << KWD_COUNT_SHIFT);

	723 return chunkEnd;

	724 }

	725 int expectedChar = keyword.codeUnitAt(count);

	726 if (getChar(position) != expectedChar) return fail(position);

	727 position++;

	728 count++;

	729 } while (count < keyword.length);

	730 if (keywordType == KWD_NULL) {

	731 listener.handleNull();

	732 } else {

	733 listener.handleBool(keywordType == KWD_TRUE);

	734 }

	735 return position;

	736 }

	737

	738 /** Convert hex-digit to its value. Returns -1 if char is not a hex digit. */

	739 int parseHexDigit(int char) {

	740 int digit = char ^ 0x30;

	741 if (digit <= 9) return digit;

	742 int letter = (char \| 0x20) ^ 0x60;

	743 // values 1 .. 6 are 'a' through 'f'

	744 if (letter <= 6 && letter > 0) return letter + 9;

	745 return -1;

	746 }

	747

	748 /**

	749 * Parses the current chunk as a chunk of JSON.

	750 *

	751 * Starts parsing at [position] and continues until [chunkEnd].

	752 * Continues parsing where the previous chunk (if any) ended.

	753 */

	754 void parse(int position) {

	755 int length = chunkEnd;

	756 if (partialState != NO_PARTIAL) {

	757 position = parsePartial(position);

	758 if (position == length) return;

	759 }

	760 int state = this.state;

239 while (position < length) {	761 while (position < length) {

240 int char = source.codeUnitAt(position);	762 int char = getChar(position);

241 switch (char) {	763 switch (char) {

242 case SPACE:	764 case SPACE:

243 case CARRIAGE_RETURN:	765 case CARRIAGE_RETURN:

244 case NEWLINE:	766 case NEWLINE:

245 case TAB:	767 case TAB:

246 position++;	768 position++;

247 break;	769 break;

248 case QUOTE:	770 case QUOTE:

249 if ((state & ALLOW_STRING_MASK) != 0) fail(position);	771 if ((state & ALLOW_STRING_MASK) != 0) return fail(position);

	772 state \|= VALUE_READ_BITS;

250 position = parseString(position + 1);	773 position = parseString(position + 1);

251 state \|= VALUE_READ_BITS;

252 break;	774 break;

253 case LBRACKET:	775 case LBRACKET:

254 if ((state & ALLOW_VALUE_MASK) != 0) fail(position);	776 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);

255 listener.beginArray();	777 listener.beginArray();

256 states.add(state);	778 saveState(state);

257 state = STATE_ARRAY_EMPTY;	779 state = STATE_ARRAY_EMPTY;

258 position++;	780 position++;

259 break;	781 break;

260 case LBRACE:	782 case LBRACE:

261 if ((state & ALLOW_VALUE_MASK) != 0) fail(position);	783 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);

262 listener.beginObject();	784 listener.beginObject();

263 states.add(state);	785 saveState(state);

264 state = STATE_OBJECT_EMPTY;	786 state = STATE_OBJECT_EMPTY;

265 position++;	787 position++;

266 break;	788 break;

267 case CHAR_n:	789 case CHAR_n:

268 if ((state & ALLOW_VALUE_MASK) != 0) fail(position);	790 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);

	791 state \|= VALUE_READ_BITS;

269 position = parseNull(position);	792 position = parseNull(position);

270 state \|= VALUE_READ_BITS;

271 break;	793 break;

272 case CHAR_f:	794 case CHAR_f:

273 if ((state & ALLOW_VALUE_MASK) != 0) fail(position);	795 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);

	796 state \|= VALUE_READ_BITS;

274 position = parseFalse(position);	797 position = parseFalse(position);

275 state \|= VALUE_READ_BITS;

276 break;	798 break;

277 case CHAR_t:	799 case CHAR_t:

278 if ((state & ALLOW_VALUE_MASK) != 0) fail(position);	800 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);

	801 state \|= VALUE_READ_BITS;

279 position = parseTrue(position);	802 position = parseTrue(position);

280 state \|= VALUE_READ_BITS;

281 break;	803 break;

282 case COLON:	804 case COLON:

283 if (state != STATE_OBJECT_KEY) fail(position);	805 if (state != STATE_OBJECT_KEY) return fail(position);

284 listener.propertyName();	806 listener.propertyName();

285 state = STATE_OBJECT_COLON;	807 state = STATE_OBJECT_COLON;

286 position++;	808 position++;

287 break;	809 break;

288 case COMMA:	810 case COMMA:

289 if (state == STATE_OBJECT_VALUE) {	811 if (state == STATE_OBJECT_VALUE) {

290 listener.propertyValue();	812 listener.propertyValue();

291 state = STATE_OBJECT_COMMA;	813 state = STATE_OBJECT_COMMA;

292 position++;	814 position++;

293 } else if (state == STATE_ARRAY_VALUE) {	815 } else if (state == STATE_ARRAY_VALUE) {

294 listener.arrayElement();	816 listener.arrayElement();

295 state = STATE_ARRAY_COMMA;	817 state = STATE_ARRAY_COMMA;

296 position++;	818 position++;

297 } else {	819 } else {

298 fail(position);	820 return fail(position);

299 }	821 }

300 break;	822 break;

301 case RBRACKET:	823 case RBRACKET:

302 if (state == STATE_ARRAY_EMPTY) {	824 if (state == STATE_ARRAY_EMPTY) {

303 listener.endArray();	825 listener.endArray();

304 } else if (state == STATE_ARRAY_VALUE) {	826 } else if (state == STATE_ARRAY_VALUE) {

305 listener.arrayElement();	827 listener.arrayElement();

306 listener.endArray();	828 listener.endArray();

307 } else {	829 } else {

308 fail(position);	830 return fail(position);

309 }	831 }

310 state = states.removeLast() \| VALUE_READ_BITS;	832 state = restoreState() \| VALUE_READ_BITS;

311 position++;	833 position++;

312 break;	834 break;

313 case RBRACE:	835 case RBRACE:

314 if (state == STATE_OBJECT_EMPTY) {	836 if (state == STATE_OBJECT_EMPTY) {

315 listener.endObject();	837 listener.endObject();

316 } else if (state == STATE_OBJECT_VALUE) {	838 } else if (state == STATE_OBJECT_VALUE) {

317 listener.propertyValue();	839 listener.propertyValue();

318 listener.endObject();	840 listener.endObject();

319 } else {	841 } else {

320 fail(position);	842 return fail(position);

321 }	843 }

322 state = states.removeLast() \| VALUE_READ_BITS;	844 state = restoreState() \| VALUE_READ_BITS;

323 position++;	845 position++;

324 break;	846 break;

325 default:	847 default:

326 if ((state & ALLOW_VALUE_MASK) != 0) fail(position);	848 if ((state & ALLOW_VALUE_MASK) != 0) fail(position);

	849 state \|= VALUE_READ_BITS;

327 position = parseNumber(char, position);	850 position = parseNumber(char, position);

328 state \|= VALUE_READ_BITS;

329 break;	851 break;

330 }	852 }

331 }	853 }

332 if (state != STATE_END) fail(position);	854 this.state = state;

333 }	855 }

334	856

335 /**	857 /**

336 * Parses a "true" literal starting at [position].	858 * Parses a "true" literal starting at [position].

337 *	859 *

338 * [:source[position]:] must be "t".	860 * [:source[position]:] must be "t".

339 */	861 */

340 int parseTrue(int position) {	862 int parseTrue(int position) {

341 assert(source.codeUnitAt(position) == CHAR_t);	863 assert(getChar(position) == CHAR_t);

342 if (source.length < position + 4) fail(position, "Unexpected identifier");	864 if (chunkEnd < position + 4) {

343 if (source.codeUnitAt(position + 1) != CHAR_r \|\|	865 return parseKeywordPrefix(position, "true", KWD_TRUE);

344 source.codeUnitAt(position + 2) != CHAR_u \|\|	866 }

345 source.codeUnitAt(position + 3) != CHAR_e) {	867 if (getChar(position + 1) != CHAR_r \|\|

346 fail(position);	868 getChar(position + 2) != CHAR_u \|\|

	869 getChar(position + 3) != CHAR_e) {

	870 return fail(position);

347 }	871 }

348 listener.handleBool(true);	872 listener.handleBool(true);

349 return position + 4;	873 return position + 4;

350 }	874 }

351	875

352 /**	876 /**

353 * Parses a "false" literal starting at [position].	877 * Parses a "false" literal starting at [position].

354 *	878 *

355 * [:source[position]:] must be "f".	879 * [:source[position]:] must be "f".

356 */	880 */

357 int parseFalse(int position) {	881 int parseFalse(int position) {

358 assert(source.codeUnitAt(position) == CHAR_f);	882 assert(getChar(position) == CHAR_f);

359 if (source.length < position + 5) fail(position, "Unexpected identifier");	883 if (chunkEnd < position + 5) {

360 if (source.codeUnitAt(position + 1) != CHAR_a \|\|	884 return parseKeywordPrefix(position, "false", KWD_FALSE);

361 source.codeUnitAt(position + 2) != CHAR_l \|\|	885 }

362 source.codeUnitAt(position + 3) != CHAR_s \|\|	886 if (getChar(position + 1) != CHAR_a \|\|

363 source.codeUnitAt(position + 4) != CHAR_e) {	887 getChar(position + 2) != CHAR_l \|\|

364 fail(position);	888 getChar(position + 3) != CHAR_s \|\|

	889 getChar(position + 4) != CHAR_e) {

	890 return fail(position);

365 }	891 }

366 listener.handleBool(false);	892 listener.handleBool(false);

367 return position + 5;	893 return position + 5;

368 }	894 }

369	895

370 /**	896 /**

371 * Parses a "null" literal starting at [position].	897 * Parses a "null" literal starting at [position].

372 *	898 *

373 * [:source[position]:] must be "n".	899 * [:source[position]:] must be "n".

374 */	900 */

375 int parseNull(int position) {	901 int parseNull(int position) {

376 assert(source.codeUnitAt(position) == CHAR_n);	902 assert(getChar(position) == CHAR_n);

377 if (source.length < position + 4) fail(position, "Unexpected identifier");	903 if (chunkEnd < position + 4) {

378 if (source.codeUnitAt(position + 1) != CHAR_u \|\|	904 return parseKeywordPrefix(position, "null", KWD_NULL);

379 source.codeUnitAt(position + 2) != CHAR_l \|\|	905 }

380 source.codeUnitAt(position + 3) != CHAR_l) {	906 if (getChar(position + 1) != CHAR_u \|\|

381 fail(position);	907 getChar(position + 2) != CHAR_l \|\|

	908 getChar(position + 3) != CHAR_l) {

	909 return fail(position);

382 }	910 }

383 listener.handleNull();	911 listener.handleNull();

384 return position + 4;	912 return position + 4;

385 }	913 }

386	914

	915 int parseKeywordPrefix(int position, String chars, int type) {

	916 assert(getChar(position) == chars.codeUnitAt(0));

	917 int length = chunkEnd;

	918 int start = position;

	919 int count = 1;

	920 while (++position < length) {

	921 int char = getChar(position);

	922 if (char != chars.codeUnitAt(count)) return fail(start);

	923 count++;

	924 }

	925 this.partialState = PARTIAL_KEYWORD \| type \| (count << KWD_COUNT_SHIFT);

	926 return length;

	927 }

	928

387 /**	929 /**

388 * Parses a string value.	930 * Parses a string value.

389 *	931 *

390 * Initial [position] is right after the initial quote.	932 * Initial [position] is right after the initial quote.

391 * Returned position right after the final quote.	933 * Returned position right after the final quote.

392 */	934 */

393 int parseString(int position) {	935 int parseString(int position) {

394 // Format: '"'([^\x00-\x1f\\\"]\|'\\'[bfnrt/\\"])*'"'	936 // Format: '"'([^\x00-\x1f\\\"]\|'\\'[bfnrt/\\"])*'"'

395 // Initial position is right after first '"'.	937 // Initial position is right after first '"'.

396 int start = position;	938 int start = position;

397 while (position < source.length) {	939 int end = chunkEnd;

398 int char = source.codeUnitAt(position++);	940 while (position < end) {

	941 int char = getChar(position++);

399 // BACKSLASH is larger than QUOTE and SPACE.	942 // BACKSLASH is larger than QUOTE and SPACE.

400 if (char > BACKSLASH) {	943 if (char > BACKSLASH) {

401 continue;	944 continue;

402 }	945 }

403 if (char == BACKSLASH) {	946 if (char == BACKSLASH) {

404 return parseStringWithEscapes(start, position - 1);	947 beginString();

	948 addSliceToString(start, position - 1);

	949 return parseStringToBuffer(position - 1);

405 }	950 }

406 if (char == QUOTE) {	951 if (char == QUOTE) {

407 listener.handleString(source.substring(start, position - 1));	952 listener.handleString(getString(start, position - 1));

408 return position;	953 return position;

409 }	954 }

410 if (char < SPACE) {	955 if (char < SPACE) {

411 fail(position - 1, "Control character in string");	956 fail(position - 1, "Control character in string");

412 }	957 }

413 }	958 }

414 fail(start - 1, "Unterminated string");	959 beginString();

415 }	960 addSliceToString(start, end);

416	961 return chunkString(STR_PLAIN);

417 int parseStringWithEscapes(start, position) {	962 }

418 // Backslash escape detected. Collect character codes for rest of string.	963

419 int firstEscape = position;	964 /**

420 List<int> chars = <int>[];	965 * Sets up a partial string state.

421 for (int i = start; i < firstEscape; i++) {	966 *

422 chars.add(source.codeUnitAt(i));	967 * The state is either not inside an escape, or right after a backslash.

423 }	968 * For partial strings ending inside a Unicode escape, use

424 position++;	969 * [chunkStringEscapeU].

	970 */

	971 int chunkString(int stringState) {

	972 partialState = PARTIAL_STRING \| stringState;

	973 return chunkEnd;

	974 }

	975

	976 /**

	977 * Sets up a partial string state for a partially parsed Unicode escape.

	978 *

	979 * The partial string state includes the current [buffer] and the

	980 * number of hex digits of the Unicode seen so far (e.g., for `"\u30')

	981 * the state knows that two digits have been seen, and what their value is.

	982 *

	983 * Returns [chunkEnd] so it can be used as part of a return statement.

	984 */

	985 int chunkStringEscapeU(int count, int value) {

	986 partialState = PARTIAL_STRING \| STR_U \|

	987 (count << STR_U_COUNT_SHIFT) \|

	988 (value << STR_U_VALUE_SHIFT);

	989 return chunkEnd;

	990 }

	991

	992 /**

	993 * Parses the remainder of a string literal into a buffer.

	994 *

	995 * The buffer is stored in [buffer] and its underlying format depends on

	996 * the input chunk type. For example UTF-8 decoding happens in the

	997 * buffer, not in the parser, since all significant JSON characters are ASCII.

	998 *

	999 * This function scans through the string literal for escapes, and copies

	1000 * slices of non-escape characters using [addSliceToString].

	1001 */

	1002 int parseStringToBuffer(position) {

	1003 int end = chunkEnd;

	1004 int start = position;

425 while (true) {	1005 while (true) {

426 if (position == source.length) {	1006 if (position == end) {

427 fail(start - 1, "Unterminated string");	1007 if (position > start) {

428 }	1008 addSliceToString(start, position);

429 int char = source.codeUnitAt(position);	1009 }

430 switch (char) {	1010 return chunkString(STR_PLAIN);

431 case CHAR_b: char = BACKSPACE; break;	1011 }

432 case CHAR_f: char = FORM_FEED; break;	1012 int char = getChar(position++);

433 case CHAR_n: char = NEWLINE; break;	1013 if (char > BACKSLASH) continue;

434 case CHAR_r: char = CARRIAGE_RETURN; break;	1014 if (char < SPACE) {

435 case CHAR_t: char = TAB; break;	1015 fail(position - 1); // Control character in string.

436 case SLASH:	1016 return;

437 case BACKSLASH:	1017 }

438 case QUOTE:	1018 if (char == QUOTE) {

439 break;	1019 int quotePosition = position - 1;

440 case CHAR_u:	1020 if (quotePosition > start) {

441 int hexStart = position - 1;	1021 addSliceToString(start, quotePosition);

442 int value = 0;	1022 }

443 for (int i = 0; i < 4; i++) {	1023 listener.handleString(endString());

444 position++;	1024 return position;

445 if (position == source.length) {	1025 }

446 fail(start - 1, "Unterminated string");	1026 if (char != BACKSLASH) {

	1027 continue;

	1028 }

	1029 // Handle escape.

	1030 if (position - 1 > start) {

	1031 addSliceToString(start, position - 1);

	1032 }

	1033 if (position == end) return chunkString(STR_ESCAPE);

	1034 position = parseStringEscape(position);

	1035 if (position == end) return position;

	1036 start = position;

	1037 }

	1038 return -1; // UNREACHABLE.

	1039 }

	1040

	1041 /**

	1042 * Parse a string escape.

	1043 *

	1044 * Position is right after the initial backslash.

	1045 * The following escape is parsed into a character code which is added to

	1046 * the current string buffer using [addCharToString].

	1047 *

	1048 * Returns position after the last character of the escape.

	1049 */

	1050 int parseStringEscape(int position) {

	1051 int char = getChar(position++);

	1052 int length = chunkEnd;

	1053 switch (char) {

	1054 case CHAR_b: char = BACKSPACE; break;

	1055 case CHAR_f: char = FORM_FEED; break;

	1056 case CHAR_n: char = NEWLINE; break;

	1057 case CHAR_r: char = CARRIAGE_RETURN; break;

	1058 case CHAR_t: char = TAB; break;

	1059 case SLASH:

	1060 case BACKSLASH:

	1061 case QUOTE:

	1062 break;

	1063 case CHAR_u:

	1064 int hexStart = position - 1;

	1065 int value = 0;

	1066 for (int i = 0; i < 4; i++) {

	1067 if (position == length) return chunkStringEscapeU(i, value);

	1068 char = getChar(position++);

	1069 int digit = char ^ 0x30;

	1070 value *= 16;

	1071 if (digit <= 9) {

	1072 value += digit;

	1073 } else {

	1074 digit = (char \| 0x20) - CHAR_a;

	1075 if (digit < 0 \|\| digit > 5) {

	1076 return fail(hexStart, "Invalid unicode escape");

447 }	1077 }

448 char = source.codeUnitAt(position);	1078 value += digit + 10;

449 char -= 0x30;

450 if (char < 0) fail(hexStart, "Invalid unicode escape");

451 if (char < 10) {

452 value = value * 16 + char;

453 } else {

454 char = (char \| 0x20) - 0x31;

455 if (char < 0 \|\| char > 5) {

456 fail(hexStart, "Invalid unicode escape");

457 }

458 value = value * 16 + char + 10;

459 }

460 }	1079 }

461 char = value;

462 break;

463 default:

464 if (char < SPACE) fail(position, "Control character in string");

465 fail(position, "Unrecognized string escape");

466 }

467 do {

468 chars.add(char);

469 position++;

470 if (position == source.length) fail(start - 1, "Unterminated string");

471 char = source.codeUnitAt(position);

472 if (char == QUOTE) {

473 String result = new String.fromCharCodes(chars);

474 listener.handleString(result);

475 return position + 1;

476 }	1080 }

477 if (char < SPACE) {	1081 char = value;

478 fail(position, "Control character in string");	1082 break;

479 }	1083 default:

480 } while (char != BACKSLASH);	1084 if (char < SPACE) return fail(position, "Control character in string");

481 position++;	1085 return fail(position, "Unrecognized string escape");

482 }	1086 }

	1087 addCharToString(char);

	1088 if (position == length) return chunkString(STR_PLAIN);

	1089 return position;

	1090 }

	1091

	1092 /// Sets up a partial numeral state.

	1093 /// Returns chunkEnd to allow easy one-line bailout tests.

	1094 int beginChunkNumber(int state, int start) {

	1095 int end = chunkEnd;

	1096 int length = end - start;

	1097 var buffer = new _NumberBuffer(length);

	1098 copyCharsToList(start, end, buffer.list, 0);

	1099 buffer.length = length;

	1100 this.buffer = buffer;

	1101 this.partialState = PARTIAL_NUMERAL \| state;

	1102 return end;

	1103 }

	1104

	1105 void addNumberChunk(_NumberBuffer buffer, int start, int end, int overhead) {

	1106 int length = end - start;

	1107 int count = buffer.length;

	1108 int newCount = count + length;

	1109 int newCapacity = newCount + overhead;

	1110 buffer.ensureCapacity(newCapacity);

	1111 copyCharsToList(start, end, buffer.list, count);

	1112 buffer.length = newCount;

	1113 }

	1114

	1115 // Continues an already chunked number accross an entire chunk.

	1116 int continueChunkNumber(int state, int start, _NumberBuffer buffer) {

	1117 int end = chunkEnd;

	1118 addNumberChunk(buffer, start, end, _NumberBuffer.kDefaultOverhead);

	1119 this.buffer = buffer;

	1120 this.partialState = PARTIAL_NUMERAL \| state;

	1121 return end;

	1122 }

	1123

	1124 int finishChunkNumber(int state, int start, int end, _NumberBuffer buffer) {

	1125 if (state == NUM_ZERO) {

	1126 listener.handleNumber(0);

	1127 return;

	1128 }

	1129 if (end > start) {

	1130 addNumberChunk(buffer, start, end, 0);

	1131 }

	1132 if (state == NUM_DIGIT) {

	1133 listener.handleNumber(buffer.parseInt());

	1134 } else if (state == NUM_DOT_DIGIT \|\| state == NUM_E_DIGIT) {

	1135 listener.handleNumber(buffer.parseDouble());

	1136 } else {

	1137 fail(chunkEnd, "Unterminated number literal");

	1138 }

	1139 return end;

483 }	1140 }

484	1141

485 int parseNumber(int char, int position) {	1142 int parseNumber(int char, int position) {

486 // Also called on any unexpected character.	1143 // Also called on any unexpected character.

487 // Format:	1144 // Format:

488 // '-'?('0'\|[1-9][0-9]*)('.'[0-9]+)?([eE][+-]?[0-9]+)?	1145 // '-'?('0'\|[1-9][0-9]*)('.'[0-9]+)?([eE][+-]?[0-9]+)?

489 int start = position;	1146 int start = position;

490 int length = source.length;	1147 int length = chunkEnd;

491 int intValue = 0; // Collect int value while parsing.	1148 // Collects an int value while parsing. Used for both an integer literal,

492 int intSign = 1;	1149 // an the exponent part of a double literal.

	1150 int intValue = 0;

	1151 double doubleValue = 0.0; // Collect double value while parsing.

	1152 int sign = 1;

493 bool isDouble = false;	1153 bool isDouble = false;

494 // Break this block when the end of the number literal is reached.	1154 // Break this block when the end of the number literal is reached.

495 // At that time, position points to the next character, and isDouble	1155 // At that time, position points to the next character, and isDouble

496 // is set if the literal contains a decimal point or an exponential.	1156 // is set if the literal contains a decimal point or an exponential.

497 parsing: {	1157 parsing: {

498 if (char == MINUS) {	1158 if (char == MINUS) {

499 intSign = -1;	1159 sign = -1;

500 position++;	1160 position++;

501 if (position == length) fail(position, "Missing expected digit");	1161 if (position == length) return beginChunkNumber(NUM_SIGN, start);

502 char = source.codeUnitAt(position);	1162 char = getChar(position);

503 }	1163 }

504 if (char < CHAR_0 \|\| char > CHAR_9) {	1164 int digit = char ^ CHAR_0;

505 if (intSign < 0) {	1165 if (digit > 9) {

	1166 if (sign < 0) {

506 fail(position, "Missing expected digit");	1167 fail(position, "Missing expected digit");

507 } else {	1168 } else {

508 // If it doesn't even start out as a numeral.	1169 // If it doesn't even start out as a numeral.

509 fail(position, "Unexpected character");	1170 fail(position, "Unexpected character");

510 }	1171 }

511 }	1172 }

512 if (char == CHAR_0) {	1173 if (digit == 0) {

513 position++;	1174 position++;

514 if (position == length) break parsing;	1175 if (position == length) return beginChunkNumber(NUM_ZERO, start);

515 char = source.codeUnitAt(position);	1176 char = getChar(position);

516 if (CHAR_0 <= char && char <= CHAR_9) {	1177 digit = char ^ CHAR_0;

517 fail(position);	1178 // If starting with zero, next character must not be digit.

518 }	1179 if (digit <= 9) fail(position);

519 } else {	1180 } else {

520 do {	1181 do {

521 intValue = intValue * 10 + (char - CHAR_0);	1182 intValue = 10 * intValue + digit;

522 position++;	1183 position++;

523 if (position == length) break parsing;	1184 if (position == length) return beginChunkNumber(NUM_DIGIT, start);

524 char = source.codeUnitAt(position);	1185 char = getChar(position);

525 } while (CHAR_0 <= char && char <= CHAR_9);	1186 digit = char ^ CHAR_0;

	1187 } while (digit <= 9);

526 }	1188 }

527 if (char == DECIMALPOINT) {	1189 if (char == DECIMALPOINT) {

528 isDouble = true;	1190 isDouble = true;

	1191 doubleValue = intValue.toDouble();

	1192 intValue = 0;

529 position++;	1193 position++;

530 if (position == length) fail(position, "Missing expected digit");	1194 if (position == length) return beginChunkNumber(NUM_DOT, start);

531 char = source.codeUnitAt(position);	1195 char = getChar(position);

532 if (char < CHAR_0 \|\| char > CHAR_9) fail(position);	1196 digit = char ^ CHAR_0;

	1197 if (digit > 9) fail(position);

533 do {	1198 do {

	1199 doubleValue = 10.0 * doubleValue + digit;

	1200 intValue -= 1;

534 position++;	1201 position++;

535 if (position == length) break parsing;	1202 if (position == length) return beginChunkNumber(NUM_DOT_DIGIT, start);

536 char = source.codeUnitAt(position);	1203 char = getChar(position);

537 } while (CHAR_0 <= char && char <= CHAR_9);	1204 digit = char ^ CHAR_0;

538 }	1205 } while (digit <= 9);

539 if (char == CHAR_e \|\| char == CHAR_E) {	1206 }

540 isDouble = true;	1207 if ((char \| 0x20) == CHAR_e) {

	1208 if (!isDouble) {

	1209 doubleValue = intValue.toDouble();

	1210 intValue = 0;

	1211 isDouble = true;

	1212 }

541 position++;	1213 position++;

542 if (position == length) fail(position, "Missing expected digit");	1214 if (position == length) return beginChunkNumber(NUM_E, start);

543 char = source.codeUnitAt(position);	1215 char = getChar(position);

	1216 int expSign = 1;

	1217 int exponent = 0;

544 if (char == PLUS \|\| char == MINUS) {	1218 if (char == PLUS \|\| char == MINUS) {

	1219 expSign = 0x2C - char; // -1 for MINUS, +1 for PLUS

545 position++;	1220 position++;

546 if (position == length) fail(position, "Missing expected digit");	1221 if (position == length) return beginChunkNumber(NUM_E_SIGN, start);

547 char = source.codeUnitAt(position);	1222 char = getChar(position);

548 }	1223 }

549 if (char < CHAR_0 \|\| char > CHAR_9) {	1224 digit = char ^ CHAR_0;

	1225 if (digit > 9) {

550 fail(position, "Missing expected digit");	1226 fail(position, "Missing expected digit");

551 }	1227 }

552 do {	1228 do {

	1229 exponent = 10 * exponent + digit;

553 position++;	1230 position++;

554 if (position == length) break parsing;	1231 if (position == length) return beginChunkNumber(NUM_E_DIGIT, start);

555 char = source.codeUnitAt(position);	1232 char = getChar(position);

556 } while (CHAR_0 <= char && char <= CHAR_9);	1233 digit = char ^ CHAR_0;

	1234 } while (digit <= 9);

	1235 intValue += expSign * exponent;

557 }	1236 }

558 }	1237 }

559 if (!isDouble) {	1238 if (!isDouble) {

560 listener.handleNumber(intSign * intValue);	1239 listener.handleNumber(sign * intValue);

561 return position;	1240 return position;

562 }	1241 }

563 // This correctly creates -0.0 for doubles.	1242 // Double values at or above this value (2**53) may have lost precission.

564 listener.handleNumber(_parseDouble(source, start, position));	1243 // Only trust results that are below this value.

	1244 const double maxExactDouble = 9007199254740992.0;

	1245 if (doubleValue < maxExactDouble) {

	1246 int exponent = intValue;

	1247 double signedMantissa = doubleValue * sign;

	1248 if (exponent >= -22) {

	1249 if (exponent < 0) {

	1250 listener.handleNumber(signedMantissa / POWERS_OF_TEN[-exponent]);

	1251 return position;

	1252 }

	1253 if (exponent == 0) {

	1254 listener.handleNumber(signedMantissa);

	1255 return position;

	1256 }

	1257 if (exponent <= 22) {

	1258 listener.handleNumber(signedMantissa * POWERS_OF_TEN[exponent]);

	1259 return position;

	1260 }

	1261 }

	1262 }

	1263 // If the value is outside the range +/-maxExactDouble or

	1264 // exponent is outside the range +/-22, then we can't trust simple double

	1265 // arithmetic to get the exact result, so we use the system double parsing.

	1266 listener.handleNumber(parseDouble(start, position));

565 return position;	1267 return position;

566 }	1268 }

567	1269

568 static double _parseDouble(String source, int start, int end)	1270 int fail(int position, [String message]) {

569 native "Double_parse";	1271 if (message == null) {

570	1272 message = "Unexpected character";

571 void fail(int position, [String message]) {	1273 if (position == chunkEnd) message = "Unexpected end of input";

572 if (message == null) message = "Unexpected character";	1274 }

573 throw new FormatException(message, source, position);	1275 throw new FormatException(message, chunk, position);

574 }	1276 }

575 }	1277 }

	1278

	1279 /**

	1280 * Chunked JSON parser that parses [String] chunks.

	1281 */

	1282 class _JsonStringParser extends _ChunkedJsonParser {

	1283 String chunk;

	1284 int chunkEnd;

	1285

	1286 _JsonStringParser(_JsonListener listener) : super(listener);

	1287

	1288 int getChar(int position) => chunk.codeUnitAt(position);

	1289

	1290 String getString(int start, int end) {

	1291 return chunk.substring(start, end);

	1292 }

	1293

	1294 void beginString() {

	1295 this.buffer = new StringBuffer();

	1296 }

	1297

	1298 void addSliceToString(int start, int end) {

	1299 StringBuffer buffer = this.buffer;

	1300 buffer.write(chunk.substring(start, end));

	1301 }

	1302

	1303 void addCharToString(int charCode) {

	1304 StringBuffer buffer = this.buffer;

	1305 buffer.writeCharCode(charCode);

	1306 }

	1307

	1308 String endString() {

	1309 StringBuffer buffer = this.buffer;

	1310 this.buffer = null;

	1311 return buffer.toString();

	1312 }

	1313

	1314 void copyCharsToList(int start, int end, List target, int offset) {

	1315 int length = end - start;

	1316 for (int i = 0; i < length; i++) {

	1317 target[offset + i] = chunk.codeUnitAt(start + i);

	1318 }

	1319 }

	1320

	1321 double parseDouble(int start, int end) {

	1322 return _parseDouble(chunk, start, end);

	1323 }

	1324 }

	1325

	1326 patch class JsonDecoder {

	1327 /* patch */ StringConversionSink startChunkedConversion(Sink<Object> sink) {

	1328 return new _JsonStringDecoderSink(this._reviver, sink);

	1329 }

	1330 }

	1331

	1332 /**

	1333 * Implements the chunked conversion from a JSON string to its corresponding

	1334 * object.

	1335 *

	1336 * The sink only creates one object, but its input can be chunked.

	1337 */

	1338 class _JsonStringDecoderSink extends StringConversionSinkBase {

	1339 _ChunkedJsonParser _parser;

	1340 Function _reviver;

	1341 final Sink<Object> _sink;

	1342

	1343 _JsonStringDecoderSink(reviver, this._sink)

	1344 : _reviver = reviver, _parser = _createParser(reviver);

	1345

	1346 static _ChunkedJsonParser _createParser(reviver) {

	1347 _BuildJsonListener listener;

	1348 if (reviver == null) {

	1349 listener = new _BuildJsonListener();

	1350 } else {

	1351 listener = new _ReviverJsonListener(reviver);

	1352 }

	1353 return new _JsonStringParser(listener);

	1354 }

	1355

	1356 void addSlice(String chunk, int start, int end, bool isLast) {

	1357 _parser.chunk = chunk;

	1358 _parser.chunkEnd = end;

	1359 _parser.parse(start);

	1360 if (isLast) _parser.close();

	1361 }

	1362

	1363 void add(String chunk) {

	1364 addSlice(chunk, 0, chunk.length, false);

	1365 }

	1366

	1367 void close() {

	1368 _parser.close();

	1369 var decoded = _parser.result;

	1370 _sink.add(decoded);

	1371 _sink.close();

	1372 }

	1373

	1374 Utf8ConversionSink asUtf8Sink(bool allowMalformed) {

	1375 _parser = null;

	1376 return new _JsonUtf8DecoderSink(_reviver, _sink, allowMalformed);

	1377 }

	1378 }

	1379

	1380 class _Utf8StringBuffer {

	1381 static const int INITIAL_CAPACITY = 32;

	1382 // Partial state encoding.

	1383 static const int MASK_TWO_BIT = 0x03;

	1384 static const int MASK_SIZE = MASK_TWO_BIT;

	1385 static const int SHIFT_MISSING = 2;

	1386 static const int SHIFT_VALUE = 4;

	1387 static const int NO_PARTIAL = 0;

	1388

	1389 // UTF-8 encoding and limits.

	1390 static const int MAX_ASCII = 127;

	1391 static const int MAX_TWO_BYTE = 0x7ff;

	1392 static const int MAX_THREE_BYTE = 0xffff;

	1393 static const int MAX_UNICODE = 0X10ffff;

	1394 static const int MASK_TWO_BYTE = 0x1f;

	1395 static const int MASK_THREE_BYTE = 0x0f;

	1396 static const int MASK_FOUR_BYTE = 0x07;

	1397 static const int MASK_CONTINUE_TAG = 0xC0;

	1398 static const int MASK_CONTINUE_VALUE = 0x3f;

	1399 static const int CONTINUE_TAG = 0x80;

	1400

	1401 // UTF-16 surrogate encoding.

	1402 static const int LEAD_SURROGATE = 0xD800;

	1403 static const int TAIL_SURROGATE = 0xDC00;

	1404 static const int SHIFT_HIGH_SURROGATE = 10;

	1405 static const int MASK_LOW_SURROGATE = 0x3ff;

	1406

	1407 // The internal buffer starts as Uint8List, but may change to Uint16List

	1408 // if the string contains non-Latin-1 characters.

	1409 List<int> buffer = new Uint8List(INITIAL_CAPACITY);

	1410 // Number of elements in buffer.

	1411 int length = 0;

	1412 // Partial decoding state, for cases where an UTF-8 sequences is split

	1413 // between chunks.

	1414 int partialState = NO_PARTIAL;

	1415 // Whether all characters so far have been Latin-1 (and the buffer is

	1416 // still a Uint8List). Set to false when the first non-Latin-1 character

	1417 // is encountered, and the buffer is then also converted to a Uint16List.

	1418 bool isLatin1 = true;

	1419 // If allowing malformed, invalid UTF-8 sequences are converted to

	1420 // U+FFFD.

	1421 bool allowMalformed;

	1422

	1423 _Utf8StringBuffer(this.allowMalformed);

	1424

	1425 /**

	1426 * Parse the continuation of a multi-byte UTF-8 sequence.

	1427 *

	1428 * Parse [utf8] from [position] to [end]. If the sequence extends beyond

	1429 * `end`, store the partial state in [partialState], and continue from there

	1430 * on the next added slice.

	1431 *

	1432 * The [size] is the number of expected continuation bytes total,

	1433 * and [missing] is the number of remaining continuation bytes.

	1434 * The [size] is used to detect overlong encodings.

	1435 * The [value] is the value collected so far.

	1436 *

	1437 * When called after seeing the first multi-byte marker, the [size] and

	1438 * [missing] values are always the same, but they may differ if continuing

	1439 * after a partial sequence.

	1440 */

	1441 int addContinuation(List<int> utf8, int position, int end,

	1442 int size, int missing, int value) {

	1443 int codeEnd = position + missing;

	1444 do {

	1445 if (position == end) {

	1446 missing = codeEnd - position;

	1447 partialState =

	1448 size \| (missing << SHIFT_MISSING) \| (value << SHIFT_VALUE);

	1449 return end;

	1450 }

	1451 int char = utf8[position];

	1452 if ((char & MASK_CONTINUE_TAG) != CONTINUE_TAG) {

	1453 if (allowMalformed) {

	1454 addCharCode(0xFFFD);

	1455 return position;

	1456 }

	1457 throw new FormatException("Expected UTF-8 continuation byte, "

	1458 "found $char", utf8, position);

	1459 }

	1460 value = 64 * value + (char & MASK_CONTINUE_VALUE);

	1461 position++;

	1462 } while (position < codeEnd);

	1463 if (value <= const [0, MAX_ASCII, MAX_TWO_BYTE, MAX_THREE_BYTE][size]) {

	1464 // Over-long encoding.

	1465 if (allowMalformed) {

	1466 value = 0xFFFD;

	1467 } else {

	1468 throw new FormatException(

	1469 "Invalid encoding: U+${value.toRadixString(16).padLeft(4, '0')}"

	1470 " encoded in ${size + 1} bytes.", utf8, position - 1);

	1471 }

	1472 }

	1473 addCharCode(value);

	1474 return position;

	1475 }

	1476

	1477 void addCharCode(int char) {

	1478 assert(char >= 0);

	1479 assert(char <= MAX_UNICODE);

	1480 if (partialState != NO_PARTIAL) {

	1481 if (allowMalformed) {

	1482 partialState = NO_PARTIAL;

	1483 addCharCode(0xFFFD);

	1484 } else {

	1485 throw new FormatException("Incomplete UTF-8 sequence", utf8);

	1486 }

	1487 }

	1488 if (isLatin1 && char > 0xff) {

	1489 _to16Bit(); // Also grows a little if close to full.

	1490 }

	1491 int length = this.length;

	1492 if (char <= MAX_THREE_BYTE) {

	1493 if (length == buffer.length) _grow();

	1494 buffer[length] = char;

	1495 this.length = length + 1;

	1496 return;

	1497 }

	1498 if (length + 2 > buffer.length) _grow();

	1499 int bits = char - 0x10000;

	1500 buffer[length] = LEAD_SURROGATE \| (bits >> SHIFT_HIGH_SURROGATE);

	1501 buffer[length + 1] = TAIL_SURROGATE \| (bits & MASK_LOW_SURROGATE);

	1502 this.length = length + 2;

	1503 }

	1504

	1505 void _to16Bit() {

	1506 assert(isLatin1);

	1507 Uint16List newBuffer;

	1508 if ((length + INITIAL_CAPACITY) * 2 <= buffer.length) {

	1509 // Reuse existing buffer if it's big enough.

	1510 newBuffer = new Uint16List.view(buffer.buffer);

	1511 } else {

	1512 int newCapacity = buffer.length;

	1513 if (newCapacity - length < INITIAL_CAPACITY) {

	1514 newCapacity = length + INITIAL_CAPACITY;

	1515 }

	1516 newBuffer = new Uint16List(newCapacity);

	1517 }

	1518 newBuffer.setRange(0, length, buffer);

	1519 buffer = newBuffer;

	1520 isLatin1 = false;

	1521 }

	1522

	1523 void _grow() {

	1524 int newCapacity = buffer.length * 2;

	1525 List newBuffer;

	1526 if (isLatin1) {

	1527 newBuffer = new Uint8List(newCapacity);

	1528 } else {

	1529 newBuffer = new Uint16List(newCapacity);

	1530 }

	1531 newBuffer.setRange(0, length, buffer);

	1532 buffer = newBuffer;

	1533 }

	1534

	1535 void addSlice(List<int> utf8, int position, int end) {

	1536 assert(position < end);

	1537 if (partialState > 0) {

	1538 int continueByteCount = (partialState & MASK_TWO_BIT);

	1539 int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT;

	1540 int value = partialState >> SHIFT_VALUE;

	1541 partialState = NO_PARTIAL;

	1542 position = addContinuation(utf8, position, end,

	1543 continueByteCount, missing, value);

	1544 if (position == end) return;

	1545 }

	1546 // Keep index and capacity in local variables while looping over

	1547 // ASCII characters.

	1548 int index = length;

	1549 int capacity = buffer.length;

	1550 while (position < end) {

	1551 int char = utf8[position];

	1552 if (char <= MAX_ASCII) {

	1553 if (index == capacity) {

	1554 length = index;

	1555 _grow();

	1556 capacity = buffer.length;

	1557 }

	1558 buffer[index++] = char;

	1559 position++;

	1560 continue;

	1561 }

	1562 length = index;

	1563 if ((char & MASK_CONTINUE_TAG) == CONTINUE_TAG) {

	1564 if (allowMalformed) {

	1565 addCharCode(0xFFFD);

	1566 position++;

	1567 } else {

	1568 throw new FormatException("Unexepected UTF-8 continuation byte",

	1569 utf8, position);

	1570 }

	1571 } else if (char < 0xE0) { // C0-DF

	1572 // Two-byte.

	1573 position = addContinuation(utf8, position + 1, end, 1, 1,

	1574 char & MASK_TWO_BYTE);

	1575 } else if (char < 0xF0) { // E0-EF

	1576 // Three-byte.

	1577 position = addContinuation(utf8, position + 1, end, 2, 2,

	1578 char & MASK_THREE_BYTE);

	1579 } else if (char < 0xF8) { // F0-F7

	1580 // Four-byte.

	1581 position = addContinuation(utf8, position + 1, end, 3, 3,

	1582 char & MASK_FOUR_BYTE);

	1583 } else {

	1584 if (allowMalformed) {

	1585 addCharCode(0xFFFD);

	1586 position++;

	1587 } else {

	1588 throw new FormatException("Invalid UTF-8 byte: $char",

	1589 utf8, position);

	1590 }

	1591 }

	1592 index = length;

	1593 capacity = buffer.length;

	1594 }

	1595 length = index;

	1596 }

	1597

	1598 String toString() {

	1599 if (partialState != NO_PARTIAL) {

	1600 if (allowMalformed) {

	1601 partialState = NO_PARTIAL;

	1602 addCharCode(0xFFFD);

	1603 } else {

	1604 int continueByteCount = (partialState & MASK_TWO_BIT);

	1605 int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT;

	1606 int value = partialState >> SHIFT_VALUE;

	1607 int seenByteCount = continueByteCount - missing + 1;

	1608 List source = new Uint8List(seenByteCount);

	1609 while (seenByteCount > 1) {

	1610 seenByteCount--;

	1611 source[seenByteCount] = CONTINUE_TAG \| (value & MASK_CONTINUE_VALUE);

	1612 value >>= 6;

	1613 }

	1614 source[0] = value \| (0x3c0 >> (continueByteCount - 1));

	1615 throw new FormatException("Incomplete UTF-8 sequence",

	1616 source, source.length);

	1617 }

	1618 }

	1619 return new String.fromCharCodes(buffer, 0, length);

	1620 }

	1621 }

	1622

	1623 /**

	1624 * Chunked JSON parser that parses UTF-8 chunks.

	1625 */

	1626 class _JsonUtf8Parser extends _ChunkedJsonParser {

	1627 final bool allowMalformed;

	1628 List<int> chunk;

	1629 int chunkEnd;

	1630

	1631 _JsonUtf8Parser(_JsonListener listener, this.allowMalformed)

	1632 : super(listener);

	1633

	1634 int getChar(int position) => chunk[position];

	1635

	1636 String getString(int start, int end) {

	1637 beginString();

	1638 addSliceToString(start, end);

	1639 String result = endString();

	1640 return result;

	1641 }

	1642

	1643 void beginString() {

	1644 this.buffer = new _Utf8StringBuffer(allowMalformed);

	1645 }

	1646

	1647 void addSliceToString(int start, int end) {

	1648 _Utf8StringBuffer buffer = this.buffer;

	1649 buffer.addSlice(chunk, start, end);

	1650 }

	1651

	1652 void addCharToString(int charCode) {

	1653 _Utf8StringBuffer buffer = this.buffer;

	1654 buffer.addCharCode(charCode);

	1655 }

	1656

	1657 String endString() {

	1658 _Utf8StringBuffer buffer = this.buffer;

	1659 this.buffer = null;

	1660 return buffer.toString();

	1661 }

	1662

	1663 void copyCharsToList(int start, int end, List target, int offset) {

	1664 int length = end - start;

	1665 target.setRange(offset, offset + length, chunk, start);

	1666 }

	1667

	1668 double parseDouble(int start, int end) {

	1669 String string = getString(start, end);

	1670 reutrn _parseDouble(string, 0, string.length);

	1671 }

	1672 }

	1673

	1674 double _parseDouble(String source, int start, int end)

	1675 native "Double_parse";

	1676

	1677 /**

	1678 * Implements the chunked conversion from a UTF-8 encoding of JSON

	1679 * to its corresponding object.

	1680 */

	1681 class _JsonUtf8DecoderSink extends ByteConversionSinkBase {

	1682 _ChunkedUtf8Parser _parser;

	1683 final Sink<Object> _sink;

	1684

	1685 _JsonUtf8DecoderSink(reviver, this._sink, bool allowMalformed)

	1686 : _parser = _createParser(reviver, allowMalformed);

	1687

	1688 static _ChunkedJsonParser _createParser(reviver, bool allowMalformed) {

	1689 _BuildJsonListener listener;

	1690 if (reviver == null) {

	1691 listener = new _BuildJsonListener();

	1692 } else {

	1693 listener = new _ReviverJsonListener(reviver);

	1694 }

	1695 return new _JsonUtf8Parser(listener, allowMalformed);

	1696 }

	1697

	1698 void addSlice(List<int> chunk, int start, int end, bool isLast) {

	1699 _addChunk(chunk, start, end);

	1700 if (isLast) close();

	1701 }

	1702

	1703 void add(List<int> chunk) {

	1704 _addChunk(chunk, 0, chunk.length);

	1705 }

	1706

	1707 void _addChunk(List<int> chunk, int start, int end) {

	1708 _parser.chunk = chunk;

	1709 _parser.chunkEnd = end;

	1710 _parser.parse(start);

	1711 }

	1712

	1713 void close() {

	1714 _parser.close();

	1715 var decoded = _parser.result;

	1716 _sink.add(decoded);

	1717 _sink.close();

	1718 }

	1719 }

OLD	NEW

« no previous file with comments | « no previous file | runtime/lib/double_patch.dart » ('j') | no next file with comments »