Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(55)

Side by Side Diff: runtime/lib/convert_patch.dart

Issue 649113005: Make JSON parsing work as a chunked conversion sink. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Address comments. Fix bug. Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | runtime/lib/double_patch.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 import "dart:_internal" show POWERS_OF_TEN;
6
5 // JSON conversion. 7 // JSON conversion.
6 8
7 patch _parseJson(String json, reviver(var key, var value)) { 9 patch _parseJson(String json, reviver(var key, var value)) {
8 _BuildJsonListener listener; 10 _BuildJsonListener listener;
9 if (reviver == null) { 11 if (reviver == null) {
10 listener = new _BuildJsonListener(); 12 listener = new _BuildJsonListener();
11 } else { 13 } else {
12 listener = new _ReviverJsonListener(reviver); 14 listener = new _ReviverJsonListener(reviver);
13 } 15 }
14 new _JsonParser(json, listener).parse(); 16 var parser = new _JsonStringParser(listener);
17 parser.chunk = json;
18 parser.chunkEnd = json.length;
19 parser.parse(0);
20 parser.close();
15 return listener.result; 21 return listener.result;
16 } 22 }
17 23
18 //// Implementation /////////////////////////////////////////////////////////// 24 //// Implementation ///////////////////////////////////////////////////////////
19 25
20 // Simple API for JSON parsing. 26 // Simple API for JSON parsing.
21 27
28 /**
29 * Listener for parsing events from [_ChunkedJsonParser].
30 */
22 abstract class _JsonListener { 31 abstract class _JsonListener {
23 void handleString(String value) {} 32 void handleString(String value) {}
24 void handleNumber(num value) {} 33 void handleNumber(num value) {}
25 void handleBool(bool value) {} 34 void handleBool(bool value) {}
26 void handleNull() {} 35 void handleNull() {}
27 void beginObject() {} 36 void beginObject() {}
28 void propertyName() {} 37 void propertyName() {}
29 void propertyValue() {} 38 void propertyValue() {}
30 void endObject() {} 39 void endObject() {}
31 void beginArray() {} 40 void beginArray() {}
32 void arrayElement() {} 41 void arrayElement() {}
33 void endArray() {} 42 void endArray() {}
34 } 43 }
35 44
36 /** 45 /**
37 * A [JsonListener] that builds data objects from the parser events. 46 * A [_JsonListener] that builds data objects from the parser events.
38 * 47 *
39 * This is a simple stack-based object builder. It keeps the most recently 48 * This is a simple stack-based object builder. It keeps the most recently
40 * seen value in a variable, and uses it depending on the following event. 49 * seen value in a variable, and uses it depending on the following event.
41 */ 50 */
42 class _BuildJsonListener extends _JsonListener { 51 class _BuildJsonListener extends _JsonListener {
43 /** 52 /**
44 * Stack used to handle nested containers. 53 * Stack used to handle nested containers.
45 * 54 *
46 * The current container is pushed on the stack when a new one is 55 * The current container is pushed on the stack when a new one is
47 * started. If the container is a [Map], there is also a current [key] 56 * started. If the container is a [Map], there is also a current [key]
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
128 void propertyValue() { 137 void propertyValue() {
129 value = reviver(key, value); 138 value = reviver(key, value);
130 super.propertyValue(); 139 super.propertyValue();
131 } 140 }
132 141
133 get result { 142 get result {
134 return reviver(null, value); 143 return reviver(null, value);
135 } 144 }
136 } 145 }
137 146
138 class _JsonParser { 147 /**
148 * Buffer holding parts of a numeral.
149 *
150 * The buffer contains the characters of a JSON number.
151 * These are all ASCII, so an [Uint8List] is used as backing store.
152 *
153 * This buffer is used when a JSON number is split between separate chunks.
154 *
155 */
156 class _NumberBuffer {
157 static const int minCapacity = 16;
158 static const int kDefaultOverhead = 5;
159 Uint8List list;
160 int length = 0;
161 _NumberBuffer(int initialCapacity)
162 : list = new Uint8List(_initialCapacity(initialCapacity));
163
164 int get capacity => list.length;
165
166 // Pick an initial capacity greater than the first part's size.
167 // The typical use case has two parts, this is the attempt at
168 // guessing the size of the second part without overdoing it.
169 // The default estimate of the second part is [kDefaultOverhead],
170 // then round to multiplum of four, and return the result,
171 // or [minCapacity] if that is greater.
172 static int _initialCapacity(int minCapacity) {
173 minCapacity += kDefaultOverhead;
174 if (minCapacity < minCapacity) return minCapacity;
175 minCapacity = (minCapacity + 3) & ~3; // Round to multiple of four.
176 return minCapacity;
177 }
178
179 // Grows to the exact size asked for.
180 void ensureCapacity(int newCapacity) {
181 Uint8List list = this.list;
182 if (newCapacity <= list.length) return;
183 Uint8List newList = new Uint8List(newCapacity);
184 newList.setRange(0, list.length, list, 0);
185 this.list = newList;
186 }
187
188 String getString() {
189 var list = this.list;
190 if (length < list.length) {
191 list = new Uint8List.view(list.buffer, 0, length);
192 }
193 String result = new String.fromCharCodes(list);
194 return result;
195 }
196
197 // TODO(lrn): See if parsing of numbers can be abstracted to something
198 // not only working on strings, but also on char-code lists, without lossing
199 // performance.
200 int parseInt() => int.parse(getString());
201 double parseDouble() => double.parse(getString());
202 }
203
204 /**
205 * Chunked JSON parser.
206 *
207 * Receives inputs in chunks, gives access to individual parts of the input,
208 * and stores input state between chunks.
209 *
210 * Implementations include [String] and UTF-8 parsers.
211 */
212 abstract class _ChunkedJsonParser {
139 // A simple non-recursive state-based parser for JSON. 213 // A simple non-recursive state-based parser for JSON.
140 // 214 //
141 // Literal values accepted in states ARRAY_EMPTY, ARRAY_COMMA, OBJECT_COLON 215 // Literal values accepted in states ARRAY_EMPTY, ARRAY_COMMA, OBJECT_COLON
142 // and strings also in OBJECT_EMPTY, OBJECT_COMMA. 216 // and strings also in OBJECT_EMPTY, OBJECT_COMMA.
143 // VALUE STRING : , } ] Transitions to 217 // VALUE STRING : , } ] Transitions to
144 // EMPTY X X -> END 218 // EMPTY X X -> END
145 // ARRAY_EMPTY X X @ -> ARRAY_VALUE / pop 219 // ARRAY_EMPTY X X @ -> ARRAY_VALUE / pop
146 // ARRAY_VALUE @ @ -> ARRAY_COMMA / pop 220 // ARRAY_VALUE @ @ -> ARRAY_COMMA / pop
147 // ARRAY_COMMA X X -> ARRAY_VALUE 221 // ARRAY_COMMA X X -> ARRAY_VALUE
148 // OBJECT_EMPTY X @ -> OBJECT_KEY / pop 222 // OBJECT_EMPTY X @ -> OBJECT_KEY / pop
(...skipping 16 matching lines...) Expand all
165 static const int INSIDE_OBJECT = 2; 239 static const int INSIDE_OBJECT = 2;
166 static const int AFTER_COLON = 3; // Always inside object. 240 static const int AFTER_COLON = 3; // Always inside object.
167 241
168 static const int ALLOW_STRING_MASK = 8; // Allowed if zero. 242 static const int ALLOW_STRING_MASK = 8; // Allowed if zero.
169 static const int ALLOW_VALUE_MASK = 4; // Allowed if zero. 243 static const int ALLOW_VALUE_MASK = 4; // Allowed if zero.
170 static const int ALLOW_VALUE = 0; 244 static const int ALLOW_VALUE = 0;
171 static const int STRING_ONLY = 4; 245 static const int STRING_ONLY = 4;
172 static const int NO_VALUES = 12; 246 static const int NO_VALUES = 12;
173 247
174 // Objects and arrays are "empty" until their first property/element. 248 // Objects and arrays are "empty" until their first property/element.
249 // At this position, they may either have an entry or a close-bracket.
175 static const int EMPTY = 0; 250 static const int EMPTY = 0;
176 static const int NON_EMPTY = 16; 251 static const int NON_EMPTY = 16;
177 static const int EMPTY_MASK = 16; // Empty if zero. 252 static const int EMPTY_MASK = 16; // Empty if zero.
178 253
179
180 static const int VALUE_READ_BITS = NO_VALUES | NON_EMPTY; 254 static const int VALUE_READ_BITS = NO_VALUES | NON_EMPTY;
181 255
182 // Actual states. 256 // Actual states.
183 static const int STATE_INITIAL = EMPTY | ALLOW_VALUE; 257 static const int STATE_INITIAL = EMPTY | ALLOW_VALUE;
184 static const int STATE_END = NON_EMPTY | NO_VALUES; 258 static const int STATE_END = NON_EMPTY | NO_VALUES;
185 259
186 static const int STATE_ARRAY_EMPTY = INSIDE_ARRAY | EMPTY | ALLOW_VALUE; 260 static const int STATE_ARRAY_EMPTY = INSIDE_ARRAY | EMPTY | ALLOW_VALUE;
187 static const int STATE_ARRAY_VALUE = INSIDE_ARRAY | NON_EMPTY | NO_VALUES; 261 static const int STATE_ARRAY_VALUE = INSIDE_ARRAY | NON_EMPTY | NO_VALUES;
188 static const int STATE_ARRAY_COMMA = INSIDE_ARRAY | NON_EMPTY | ALLOW_VALUE; 262 static const int STATE_ARRAY_COMMA = INSIDE_ARRAY | NON_EMPTY | ALLOW_VALUE;
189 263
(...skipping 29 matching lines...) Expand all
219 static const int CHAR_f = 0x66; 293 static const int CHAR_f = 0x66;
220 static const int CHAR_l = 0x6c; 294 static const int CHAR_l = 0x6c;
221 static const int CHAR_n = 0x6e; 295 static const int CHAR_n = 0x6e;
222 static const int CHAR_r = 0x72; 296 static const int CHAR_r = 0x72;
223 static const int CHAR_s = 0x73; 297 static const int CHAR_s = 0x73;
224 static const int CHAR_t = 0x74; 298 static const int CHAR_t = 0x74;
225 static const int CHAR_u = 0x75; 299 static const int CHAR_u = 0x75;
226 static const int LBRACE = 0x7b; 300 static const int LBRACE = 0x7b;
227 static const int RBRACE = 0x7d; 301 static const int RBRACE = 0x7d;
228 302
229 final String source; 303 // State of partial value at chunk split.
304 static const int NO_PARTIAL = 0;
305 static const int PARTIAL_STRING = 1;
306 static const int PARTIAL_NUMERAL = 2;
307 static const int PARTIAL_KEYWORD = 3;
308 static const int MASK_PARTIAL = 3;
309
310 // Partial states for numerals. Values can be |'ed with PARTIAL_NUMERAL.
311 static const int NUM_SIGN = 0; // After initial '-'.
312 static const int NUM_ZERO = 4; // After '0' as first digit.
313 static const int NUM_DIGIT = 8; // After digit, no '.' or 'e' seen.
314 static const int NUM_DOT = 12; // After '.'.
315 static const int NUM_DOT_DIGIT = 16; // After a decimal digit (after '.').
316 static const int NUM_E = 20; // After 'e' or 'E'.
317 static const int NUM_E_SIGN = 24; // After '-' or '+' after 'e' or 'E'.
318 static const int NUM_E_DIGIT = 28; // After exponent digit.
319 static const int NUM_SUCCESS = 32; // Never stored as partial state.
320
321 // Partial states for strings.
322 static const int STR_PLAIN = 0; // Inside string, but not escape.
323 static const int STR_ESCAPE = 4; // After '\'.
324 static const int STR_U = 16; // After '\u' and 0-3 hex digits.
325 static const int STR_U_COUNT_SHIFT = 2; // Hex digit count in bits 2-3.
326 static const int STR_U_VALUE_SHIFT = 5; // Hex digit value in bits 5+.
327
328 // Partial states for keywords.
329 static const int KWD_TYPE_MASK = 12;
330 static const int KWD_TYPE_SHIFT = 2;
331 static const int KWD_NULL = 0; // Prefix of "null" seen.
332 static const int KWD_TRUE = 4; // Prefix of "true" seen.
333 static const int KWD_FALSE = 8; // Prefix of "false" seen.
334 static const int KWD_COUNT_SHIFT = 4; // Prefix length in bits 4+.
335
336 // Mask used to mask off two lower bits.
337 static const int TWO_BIT_MASK = 3;
338
230 final _JsonListener listener; 339 final _JsonListener listener;
231 _JsonParser(this.source, this.listener); 340
232 341 // The current parsing state.
233 /** Parses [source], or throws if it fails. */ 342 int state = STATE_INITIAL;
234 void parse() { 343 List<int> states = <int>[];
235 final List<int> states = <int>[]; 344
236 int state = STATE_INITIAL; 345 /**
237 int position = 0; 346 * Stores tokenizer state between chunks.
238 int length = source.length; 347 *
348 * This state is stored when a chunk stops in the middle of a
349 * token (string, numeral, boolean or null).
350 *
351 * The partial state is used to continue parsing on the next chunk.
352 * The previous chunk is not retained, any data needed are stored in
353 * this integer, or in the [buffer] field as a string-building buffer
354 * or a [_NumberBuffer].
355 *
356 * Prefix state stored in [prefixState] as bits.
357 *
358 * ..00 : No partial value (NO_PARTIAL).
359 *
360 * ..00001 : Partial string, not inside escape.
361 * ..00101 : Partial string, after '\'.
362 * ..vvvv1dd01 : Partial \u escape.
363 * The 'dd' bits (2-3) encode the number of hex digits seen.
364 * Bits 5-16 encode the value of the hex digits seen so far.
365 *
366 * ..0ddd10 : Partial numeral.
367 * The `ddd` bits store the parts of in the numeral seen so
368 * far, as the constants `NUM_*` defined above.
369 * The characters of the numeral are stored in [buffer]
370 * as a [_NumberBuffer].
371 *
372 * ..0ddd0011 : Partial 'null' keyword.
373 * ..0ddd0111 : Partial 'true' keyword.
374 * ..0ddd1011 : Partial 'false' keyword.
375 * For all three keywords, the `ddd` bits encode the number
376 * of letters seen.
377 */
378 int partialState = NO_PARTIAL;
379
380 /**
381 * Extra data stored while parsing a primitive value.
382 * May be set during parsing, always set at chunk end if a value is partial.
383 *
384 * May contain a string buffer while parsing strings.
385 */
386 var buffer = null;
387
388 _ChunkedJsonParser(this.listener);
389
390 /**
391 * Push the current parse [state] on a stack.
392 *
393 * State is pushed when a new array or object literal starts,
394 * so the parser can go back to the correct value when the literal ends.
395 */
396 void saveState(int state) {
397 states.add(state);
398 }
399
400 /**
401 * Restore a state pushed with [saveState].
402 */
403 int restoreState() {
404 return states.removeLast(); // Throws if empty.
405 }
406
407 /**
408 * Finalizes the parsing.
409 *
410 * Throws if the source read so far doesn't end up with a complete
411 * parsed value. That means it must not be inside a list or object
412 * literal, and any partial value read should also be a valid complete
413 * value.
414 *
415 * The only valid partial state is a number that ends in a digit, and
416 * only if the number is the entire JSON value being parsed
417 * (otherwise it would be inside a list or object).
418 * Such a number will be completed. Any other partial state is an error.
419 */
420 void close() {
421 if (partialState != NO_PARTIAL) {
422 int partialType = partialState & MASK_PARTIAL;
423 if (partialType == PARTIAL_NUMERAL) {
424 int numState = partialState & ~MASK_PARTIAL;
425 // A partial number might be a valid number if we know it's done.
426 // There is an unnecessary overhead if input is a single number,
427 // but this is assumed to be rare.
428 _NumberBuffer buffer = this.buffer;
429 this.buffer = null;
430 finishChunkNumber(numState, 0, 0, buffer);
431 } else if (partialType == PARTIAL_STRING) {
432 fail(chunkEnd, "Unterminated string");
433 } else {
434 assert(partialType == PARTIAL_KEYWORD);
435 fail(chunkEnd); // Incomplete literal.
436 }
437 }
438 if (state != STATE_END) {
439 fail(chunkEnd);
440 }
441 }
442
443 /**
444 * Read out the result after successfully closing the parser.
445 *
446 * The parser is closed by calling [close] or calling [addSourceChunk] with
447 * `true` as second (`isLast`) argument.
448 */
449 Object get result {
450 return listener.result;
451 }
452
453 /** Sets the current source chunk. */
454 void set chunk(var source);
455
456 /**
457 * Length of current chunk.
458 *
459 * The valid arguments to [getChar] are 0 .. `chunkEnd - 1`.
460 */
461 int get chunkEnd;
462
463 /**
464 * Returns the chunk itself.
465 *
466 * Only used by [fail] to include the chunk in the thrown [FormatException].
467 */
468 get chunk;
469
470 /**
471 * Get charcacter/code unit of current chunk.
472 *
473 * The [index] must be non-negative and less than `chunkEnd`.
474 * In practive, [index] will be no smaller than the `start` argument passed
475 * to [parse].
476 */
477 int getChar(int index);
478
479 /**
480 * Copy ASCII characters from start to end of chunk into a list.
481 *
482 * Used for number buffer (always copies ASCII, so encoding is not important).
483 */
484 void copyCharsToList(int start, int end, List<int> target);
485
486 /**
487 * Build a string using input code units.
488 *
489 * Creates a string buffer and enables adding characters and slices
490 * to that buffer.
491 * The buffer is stored in the [buffer] field. If the string is unterminated,
492 * the same buffer is used to continue parsing in the next chunk.
493 */
494 void beginString();
495 /**
496 * Add single character code to string being built.
497 *
498 * Used for unparsed escape sequences.
499 */
500 void addCharToString(int charCode);
501
502 /**
503 * Adds slice of current chunk to string being built.
504 *
505 * The [start] positions is inclusive, [end] is exclusive.
506 */
507 void addSliceToString(int start, int end);
508
509 /** Finalizes the string being built and returns it as a String. */
510 String endString();
511
512 /**
513 * Extracts a literal string from a slice of the current chunk.
514 *
515 * No interpretation of the content is performed, except for converting
516 * the source format to string.
517 * This can be implemented more or less efficiently depending on the
518 * underlying source.
519 *
520 * This is used for string literals that contain no escapes.
521 */
522 String getString(int start, int end);
523
524 /**
525 * Parse a slice of the current chunk as an integer.
526 *
527 * The format is expected to be correct.
528 */
529 int parseInt(int start, int end) {
530 return int.parse(getString(start, end));
531 }
532
533 /**
534 * Parse a slice of the current chunk as a double.
535 *
536 * The format is expected to be correct.
537 * This is used by [parseNumber] when the double value cannot be
538 * built exactly during parsing.
539 */
540 double parseDouble(int start, int end) {
541 return double.parse(getString(start, end));
542 }
543
544 /**
545 * Create a _NumberBuffer containing the digits from [start] to [chunkEnd].
546 *
547 * This creates a number buffer and initializes it with the part of the
548 * number literal ending the current chunk
549 */
550 void createNumberBuffer(int start) {
551 assert(start >= 0);
552 assert(start < chunkEnd);
553 int length = chunkEnd - start;
554 var buffer = new _NumberBuffer(length);
555 copyCharsToList(start, chunkEnd, buffer.list);
556 buffer.length = length;
557 return buffer;
558 }
559
560 /**
561 * Continues parsing a partial value.
562 */
563 int parsePartial(int position) {
564 if (position == chunkEnd) return position;
565 int partialState = this.partialState;
566 assert(partialState != NO_PARTIAL);
567 int partialType = partialState & MASK_PARTIAL;
568 this.partialState = NO_PARTIAL;
569 partialState = partialState & ~MASK_PARTIAL;
570 assert(partialType != 0);
571 if (partialType == PARTIAL_STRING) {
572 position = parsePartialString(position, partialState);
573 } else if (partialType == PARTIAL_NUMERAL) {
574 position = parsePartialNumber(position, partialState);
575 } else if (partialType == PARTIAL_KEYWORD) {
576 position = parsePartialKeyword(position, partialState);
577 }
578 return position;
579 }
580
581 /**
582 * Parses the remainder of a number into the number buffer.
583 *
584 * Syntax is checked while pasing.
585 * Starts at position, which is expected to be the start of the chunk,
586 * and returns the index of the first non-number-literal character found,
587 * or chunkEnd if the entire chunk is a valid number continuation.
588 * Throws if a syntax error is detected.
589 */
590 int parsePartialNumber(int position, int state) {
591 int start = position;
592 // Primitive implementation, can be optimized.
593 _NumberBuffer buffer = this.buffer;
594 this.buffer = null;
595 int end = chunkEnd;
596 toBailout: {
597 if (position == end) break toBailout;
598 int char = getChar(position);
599 int digit = char ^ CHAR_0;
600 if (state == NUM_SIGN) {
601 if (digit <= 9) {
602 if (digit == 0) {
603 state = NUM_ZERO;
604 } else {
605 state = NUM_DIGIT;
606 }
607 position++;
608 if (position == end) break toBailout;
609 char = getChar(position);
610 digit = char ^ CHAR_0;
611 } else {
612 return fail(position);
613 }
614 }
615 if (state == NUM_ZERO) {
616 // JSON does not allow insignificant leading zeros (e.g., "09").
617 if (digit <= 9) return fail(position);
618 state = NUM_DIGIT;
619 }
620 while (state == NUM_DIGIT) {
621 if (digit > 9) {
622 if (char == DECIMALPOINT) {
623 state = NUM_DOT;
624 } else if ((char | 0x20) == CHAR_e) {
625 state = NUM_E;
626 } else {
627 finishChunkNumber(state, start, position, buffer);
628 return position;
629 }
630 }
631 position++;
632 if (position == end) break toBailout;
633 char = getChar(position);
634 digit = char ^ CHAR_0;
635 }
636 if (state == NUM_DOT) {
637 if (digit > 9) return fail(position);
638 state = NUM_DOT_DIGIT;
639 }
640 while (state == NUM_DOT_DIGIT) {
641 if (digit > 9) {
642 if ((char | 0x20) == CHAR_e) {
643 state = NUM_E;
644 } else {
645 finishChunkNumber(state, start, position, buffer);
646 return position;
647 }
648 }
649 position++;
650 if (position == end) break toBailout;
651 char = getChar(position);
652 digit = char ^ CHAR_0;
653 }
654 if (state == NUM_E) {
655 if (char == PLUS || char == MINUS) {
656 state = NUM_E_SIGN;
657 position++;
658 if (position == end) break toBailout;
659 char = getChar(position);
660 digit = char ^ CHAR_0;
661 }
662 }
663 assert(state >= NUM_E);
664 while (digit <= 9) {
665 state = NUM_E_DIGIT;
666 position++;
667 if (position == end) break toBailout;
668 char = getChar(position);
669 digit = char ^ CHAR_0;
670 }
671 finishChunkNumber(state, start, position, buffer);
672 return position;
673 }
674 // Bailout code in case the current chunk ends while parsing the numeral.
675 assert(position == end);
676 continueChunkNumber(state, start, buffer);
677 return chunkEnd;
678 }
679
680 /**
681 * Continues parsing a partial string literal.
682 *
683 * Handles partial escapes and then hands the parsing off to
684 * [parseStringToBuffer].
685 */
686 int parsePartialString(int position, int partialState) {
687 if (partialState == STR_PLAIN) {
688 return parseStringToBuffer(position);
689 }
690 if (partialState == STR_ESCAPE) {
691 position = parseStringEscape(position);
692 // parseStringEscape sets partialState if it sees the end.
693 if (position == chunkEnd) return position;
694 return parseStringToBuffer(position);
695 }
696 assert((partialState & STR_U) != 0);
697 int value = partialState >> STR_U_VALUE_SHIFT;
698 int count = (partialState >> STR_U_COUNT_SHIFT) & TWO_BIT_MASK;
699 for (int i = count; i < 4; i++, position++) {
700 if (position == chunkEnd) return chunkStringEscapeU(i, value);
701 int char = getChar(position);
702 int digit = parseHexDigit(char);
703 if (digit < 0) fail(position, "Invalid hex digit");
704 value = 16 * value + digit;
705 }
706 addCharToString(value);
707 return parseStringToBuffer(position);
708 }
709
710 /**
711 * Continues parsing a partial keyword.
712 */
713 int parsePartialKeyword(int position, int partialState) {
714 int keywordType = partialState & KWD_TYPE_MASK;
715 int count = partialState >> KWD_COUNT_SHIFT;
716 int keywordTypeIndex = keywordType >> KWD_TYPE_SHIFT;
717 String keyword = const ["null", "true", "false"][keywordTypeIndex];
718 assert(count < keyword.length);
719 do {
720 if (position == chunkEnd) {
721 this.partialState =
722 PARTIAL_KEYWORD | keywordType | (count << KWD_COUNT_SHIFT);
723 return chunkEnd;
724 }
725 int expectedChar = keyword.codeUnitAt(count);
726 if (getChar(position) != expectedChar) return fail(position);
727 position++;
728 count++;
729 } while (count < keyword.length);
730 if (keywordType == KWD_NULL) {
731 listener.handleNull();
732 } else {
733 listener.handleBool(keywordType == KWD_TRUE);
734 }
735 return position;
736 }
737
738 /** Convert hex-digit to its value. Returns -1 if char is not a hex digit. */
739 int parseHexDigit(int char) {
740 int digit = char ^ 0x30;
741 if (digit <= 9) return digit;
742 int letter = (char | 0x20) ^ 0x60;
743 // values 1 .. 6 are 'a' through 'f'
744 if (letter <= 6 && letter > 0) return letter + 9;
745 return -1;
746 }
747
748 /**
749 * Parses the current chunk as a chunk of JSON.
750 *
751 * Starts parsing at [position] and continues until [chunkEnd].
752 * Continues parsing where the previous chunk (if any) ended.
753 */
754 void parse(int position) {
755 int length = chunkEnd;
756 if (partialState != NO_PARTIAL) {
757 position = parsePartial(position);
758 if (position == length) return;
759 }
760 int state = this.state;
239 while (position < length) { 761 while (position < length) {
240 int char = source.codeUnitAt(position); 762 int char = getChar(position);
241 switch (char) { 763 switch (char) {
242 case SPACE: 764 case SPACE:
243 case CARRIAGE_RETURN: 765 case CARRIAGE_RETURN:
244 case NEWLINE: 766 case NEWLINE:
245 case TAB: 767 case TAB:
246 position++; 768 position++;
247 break; 769 break;
248 case QUOTE: 770 case QUOTE:
249 if ((state & ALLOW_STRING_MASK) != 0) fail(position); 771 if ((state & ALLOW_STRING_MASK) != 0) return fail(position);
772 state |= VALUE_READ_BITS;
250 position = parseString(position + 1); 773 position = parseString(position + 1);
251 state |= VALUE_READ_BITS;
252 break; 774 break;
253 case LBRACKET: 775 case LBRACKET:
254 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); 776 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
255 listener.beginArray(); 777 listener.beginArray();
256 states.add(state); 778 saveState(state);
257 state = STATE_ARRAY_EMPTY; 779 state = STATE_ARRAY_EMPTY;
258 position++; 780 position++;
259 break; 781 break;
260 case LBRACE: 782 case LBRACE:
261 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); 783 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
262 listener.beginObject(); 784 listener.beginObject();
263 states.add(state); 785 saveState(state);
264 state = STATE_OBJECT_EMPTY; 786 state = STATE_OBJECT_EMPTY;
265 position++; 787 position++;
266 break; 788 break;
267 case CHAR_n: 789 case CHAR_n:
268 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); 790 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
791 state |= VALUE_READ_BITS;
269 position = parseNull(position); 792 position = parseNull(position);
270 state |= VALUE_READ_BITS;
271 break; 793 break;
272 case CHAR_f: 794 case CHAR_f:
273 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); 795 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
796 state |= VALUE_READ_BITS;
274 position = parseFalse(position); 797 position = parseFalse(position);
275 state |= VALUE_READ_BITS;
276 break; 798 break;
277 case CHAR_t: 799 case CHAR_t:
278 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); 800 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
801 state |= VALUE_READ_BITS;
279 position = parseTrue(position); 802 position = parseTrue(position);
280 state |= VALUE_READ_BITS;
281 break; 803 break;
282 case COLON: 804 case COLON:
283 if (state != STATE_OBJECT_KEY) fail(position); 805 if (state != STATE_OBJECT_KEY) return fail(position);
284 listener.propertyName(); 806 listener.propertyName();
285 state = STATE_OBJECT_COLON; 807 state = STATE_OBJECT_COLON;
286 position++; 808 position++;
287 break; 809 break;
288 case COMMA: 810 case COMMA:
289 if (state == STATE_OBJECT_VALUE) { 811 if (state == STATE_OBJECT_VALUE) {
290 listener.propertyValue(); 812 listener.propertyValue();
291 state = STATE_OBJECT_COMMA; 813 state = STATE_OBJECT_COMMA;
292 position++; 814 position++;
293 } else if (state == STATE_ARRAY_VALUE) { 815 } else if (state == STATE_ARRAY_VALUE) {
294 listener.arrayElement(); 816 listener.arrayElement();
295 state = STATE_ARRAY_COMMA; 817 state = STATE_ARRAY_COMMA;
296 position++; 818 position++;
297 } else { 819 } else {
298 fail(position); 820 return fail(position);
299 } 821 }
300 break; 822 break;
301 case RBRACKET: 823 case RBRACKET:
302 if (state == STATE_ARRAY_EMPTY) { 824 if (state == STATE_ARRAY_EMPTY) {
303 listener.endArray(); 825 listener.endArray();
304 } else if (state == STATE_ARRAY_VALUE) { 826 } else if (state == STATE_ARRAY_VALUE) {
305 listener.arrayElement(); 827 listener.arrayElement();
306 listener.endArray(); 828 listener.endArray();
307 } else { 829 } else {
308 fail(position); 830 return fail(position);
309 } 831 }
310 state = states.removeLast() | VALUE_READ_BITS; 832 state = restoreState() | VALUE_READ_BITS;
311 position++; 833 position++;
312 break; 834 break;
313 case RBRACE: 835 case RBRACE:
314 if (state == STATE_OBJECT_EMPTY) { 836 if (state == STATE_OBJECT_EMPTY) {
315 listener.endObject(); 837 listener.endObject();
316 } else if (state == STATE_OBJECT_VALUE) { 838 } else if (state == STATE_OBJECT_VALUE) {
317 listener.propertyValue(); 839 listener.propertyValue();
318 listener.endObject(); 840 listener.endObject();
319 } else { 841 } else {
320 fail(position); 842 return fail(position);
321 } 843 }
322 state = states.removeLast() | VALUE_READ_BITS; 844 state = restoreState() | VALUE_READ_BITS;
323 position++; 845 position++;
324 break; 846 break;
325 default: 847 default:
326 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); 848 if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
849 state |= VALUE_READ_BITS;
327 position = parseNumber(char, position); 850 position = parseNumber(char, position);
328 state |= VALUE_READ_BITS;
329 break; 851 break;
330 } 852 }
331 } 853 }
332 if (state != STATE_END) fail(position); 854 this.state = state;
333 } 855 }
334 856
335 /** 857 /**
336 * Parses a "true" literal starting at [position]. 858 * Parses a "true" literal starting at [position].
337 * 859 *
338 * [:source[position]:] must be "t". 860 * [:source[position]:] must be "t".
339 */ 861 */
340 int parseTrue(int position) { 862 int parseTrue(int position) {
341 assert(source.codeUnitAt(position) == CHAR_t); 863 assert(getChar(position) == CHAR_t);
342 if (source.length < position + 4) fail(position, "Unexpected identifier"); 864 if (chunkEnd < position + 4) {
343 if (source.codeUnitAt(position + 1) != CHAR_r || 865 return parseKeywordPrefix(position, "true", KWD_TRUE);
344 source.codeUnitAt(position + 2) != CHAR_u || 866 }
345 source.codeUnitAt(position + 3) != CHAR_e) { 867 if (getChar(position + 1) != CHAR_r ||
346 fail(position); 868 getChar(position + 2) != CHAR_u ||
869 getChar(position + 3) != CHAR_e) {
870 return fail(position);
347 } 871 }
348 listener.handleBool(true); 872 listener.handleBool(true);
349 return position + 4; 873 return position + 4;
350 } 874 }
351 875
352 /** 876 /**
353 * Parses a "false" literal starting at [position]. 877 * Parses a "false" literal starting at [position].
354 * 878 *
355 * [:source[position]:] must be "f". 879 * [:source[position]:] must be "f".
356 */ 880 */
357 int parseFalse(int position) { 881 int parseFalse(int position) {
358 assert(source.codeUnitAt(position) == CHAR_f); 882 assert(getChar(position) == CHAR_f);
359 if (source.length < position + 5) fail(position, "Unexpected identifier"); 883 if (chunkEnd < position + 5) {
360 if (source.codeUnitAt(position + 1) != CHAR_a || 884 return parseKeywordPrefix(position, "false", KWD_FALSE);
361 source.codeUnitAt(position + 2) != CHAR_l || 885 }
362 source.codeUnitAt(position + 3) != CHAR_s || 886 if (getChar(position + 1) != CHAR_a ||
363 source.codeUnitAt(position + 4) != CHAR_e) { 887 getChar(position + 2) != CHAR_l ||
364 fail(position); 888 getChar(position + 3) != CHAR_s ||
889 getChar(position + 4) != CHAR_e) {
890 return fail(position);
365 } 891 }
366 listener.handleBool(false); 892 listener.handleBool(false);
367 return position + 5; 893 return position + 5;
368 } 894 }
369 895
370 /** 896 /**
371 * Parses a "null" literal starting at [position]. 897 * Parses a "null" literal starting at [position].
372 * 898 *
373 * [:source[position]:] must be "n". 899 * [:source[position]:] must be "n".
374 */ 900 */
375 int parseNull(int position) { 901 int parseNull(int position) {
376 assert(source.codeUnitAt(position) == CHAR_n); 902 assert(getChar(position) == CHAR_n);
377 if (source.length < position + 4) fail(position, "Unexpected identifier"); 903 if (chunkEnd < position + 4) {
378 if (source.codeUnitAt(position + 1) != CHAR_u || 904 return parseKeywordPrefix(position, "null", KWD_NULL);
379 source.codeUnitAt(position + 2) != CHAR_l || 905 }
380 source.codeUnitAt(position + 3) != CHAR_l) { 906 if (getChar(position + 1) != CHAR_u ||
381 fail(position); 907 getChar(position + 2) != CHAR_l ||
908 getChar(position + 3) != CHAR_l) {
909 return fail(position);
382 } 910 }
383 listener.handleNull(); 911 listener.handleNull();
384 return position + 4; 912 return position + 4;
385 } 913 }
386 914
915 int parseKeywordPrefix(int position, String chars, int type) {
916 assert(getChar(position) == chars.codeUnitAt(0));
917 int length = chunkEnd;
918 int start = position;
919 int count = 1;
920 while (++position < length) {
921 int char = getChar(position);
922 if (char != chars.codeUnitAt(count)) return fail(start);
923 count++;
924 }
925 this.partialState = PARTIAL_KEYWORD | type | (count << KWD_COUNT_SHIFT);
926 return length;
927 }
928
387 /** 929 /**
388 * Parses a string value. 930 * Parses a string value.
389 * 931 *
390 * Initial [position] is right after the initial quote. 932 * Initial [position] is right after the initial quote.
391 * Returned position right after the final quote. 933 * Returned position right after the final quote.
392 */ 934 */
393 int parseString(int position) { 935 int parseString(int position) {
394 // Format: '"'([^\x00-\x1f\\\"]|'\\'[bfnrt/\\"])*'"' 936 // Format: '"'([^\x00-\x1f\\\"]|'\\'[bfnrt/\\"])*'"'
395 // Initial position is right after first '"'. 937 // Initial position is right after first '"'.
396 int start = position; 938 int start = position;
397 while (position < source.length) { 939 int end = chunkEnd;
398 int char = source.codeUnitAt(position++); 940 while (position < end) {
941 int char = getChar(position++);
399 // BACKSLASH is larger than QUOTE and SPACE. 942 // BACKSLASH is larger than QUOTE and SPACE.
400 if (char > BACKSLASH) { 943 if (char > BACKSLASH) {
401 continue; 944 continue;
402 } 945 }
403 if (char == BACKSLASH) { 946 if (char == BACKSLASH) {
404 return parseStringWithEscapes(start, position - 1); 947 beginString();
948 addSliceToString(start, position - 1);
949 return parseStringToBuffer(position - 1);
405 } 950 }
406 if (char == QUOTE) { 951 if (char == QUOTE) {
407 listener.handleString(source.substring(start, position - 1)); 952 listener.handleString(getString(start, position - 1));
408 return position; 953 return position;
409 } 954 }
410 if (char < SPACE) { 955 if (char < SPACE) {
411 fail(position - 1, "Control character in string"); 956 fail(position - 1, "Control character in string");
412 } 957 }
413 } 958 }
414 fail(start - 1, "Unterminated string"); 959 beginString();
415 } 960 addSliceToString(start, end);
416 961 return chunkString(STR_PLAIN);
417 int parseStringWithEscapes(start, position) { 962 }
418 // Backslash escape detected. Collect character codes for rest of string. 963
419 int firstEscape = position; 964 /**
420 List<int> chars = <int>[]; 965 * Sets up a partial string state.
421 for (int i = start; i < firstEscape; i++) { 966 *
422 chars.add(source.codeUnitAt(i)); 967 * The state is either not inside an escape, or right after a backslash.
423 } 968 * For partial strings ending inside a Unicode escape, use
424 position++; 969 * [chunkStringEscapeU].
970 */
971 int chunkString(int stringState) {
972 partialState = PARTIAL_STRING | stringState;
973 return chunkEnd;
974 }
975
976 /**
977 * Sets up a partial string state for a partially parsed Unicode escape.
978 *
979 * The partial string state includes the current [buffer] and the
980 * number of hex digits of the Unicode seen so far (e.g., for `"\u30')
981 * the state knows that two digits have been seen, and what their value is.
982 *
983 * Returns [chunkEnd] so it can be used as part of a return statement.
984 */
985 int chunkStringEscapeU(int count, int value) {
986 partialState = PARTIAL_STRING | STR_U |
987 (count << STR_U_COUNT_SHIFT) |
988 (value << STR_U_VALUE_SHIFT);
989 return chunkEnd;
990 }
991
992 /**
993 * Parses the remainder of a string literal into a buffer.
994 *
995 * The buffer is stored in [buffer] and its underlying format depends on
996 * the input chunk type. For example UTF-8 decoding happens in the
997 * buffer, not in the parser, since all significant JSON characters are ASCII.
998 *
999 * This function scans through the string literal for escapes, and copies
1000 * slices of non-escape characters using [addSliceToString].
1001 */
1002 int parseStringToBuffer(position) {
1003 int end = chunkEnd;
1004 int start = position;
425 while (true) { 1005 while (true) {
426 if (position == source.length) { 1006 if (position == end) {
427 fail(start - 1, "Unterminated string"); 1007 if (position > start) {
428 } 1008 addSliceToString(start, position);
429 int char = source.codeUnitAt(position); 1009 }
430 switch (char) { 1010 return chunkString(STR_PLAIN);
431 case CHAR_b: char = BACKSPACE; break; 1011 }
432 case CHAR_f: char = FORM_FEED; break; 1012 int char = getChar(position++);
433 case CHAR_n: char = NEWLINE; break; 1013 if (char > BACKSLASH) continue;
434 case CHAR_r: char = CARRIAGE_RETURN; break; 1014 if (char < SPACE) {
435 case CHAR_t: char = TAB; break; 1015 fail(position - 1); // Control character in string.
436 case SLASH: 1016 return;
437 case BACKSLASH: 1017 }
438 case QUOTE: 1018 if (char == QUOTE) {
439 break; 1019 int quotePosition = position - 1;
440 case CHAR_u: 1020 if (quotePosition > start) {
441 int hexStart = position - 1; 1021 addSliceToString(start, quotePosition);
442 int value = 0; 1022 }
443 for (int i = 0; i < 4; i++) { 1023 listener.handleString(endString());
444 position++; 1024 return position;
445 if (position == source.length) { 1025 }
446 fail(start - 1, "Unterminated string"); 1026 if (char != BACKSLASH) {
1027 continue;
1028 }
1029 // Handle escape.
1030 if (position - 1 > start) {
1031 addSliceToString(start, position - 1);
1032 }
1033 if (position == end) return chunkString(STR_ESCAPE);
1034 position = parseStringEscape(position);
1035 if (position == end) return position;
1036 start = position;
1037 }
1038 return -1; // UNREACHABLE.
1039 }
1040
1041 /**
1042 * Parse a string escape.
1043 *
1044 * Position is right after the initial backslash.
1045 * The following escape is parsed into a character code which is added to
1046 * the current string buffer using [addCharToString].
1047 *
1048 * Returns position after the last character of the escape.
1049 */
1050 int parseStringEscape(int position) {
1051 int char = getChar(position++);
1052 int length = chunkEnd;
1053 switch (char) {
1054 case CHAR_b: char = BACKSPACE; break;
1055 case CHAR_f: char = FORM_FEED; break;
1056 case CHAR_n: char = NEWLINE; break;
1057 case CHAR_r: char = CARRIAGE_RETURN; break;
1058 case CHAR_t: char = TAB; break;
1059 case SLASH:
1060 case BACKSLASH:
1061 case QUOTE:
1062 break;
1063 case CHAR_u:
1064 int hexStart = position - 1;
1065 int value = 0;
1066 for (int i = 0; i < 4; i++) {
1067 if (position == length) return chunkStringEscapeU(i, value);
1068 char = getChar(position++);
1069 int digit = char ^ 0x30;
1070 value *= 16;
1071 if (digit <= 9) {
1072 value += digit;
1073 } else {
1074 digit = (char | 0x20) - CHAR_a;
1075 if (digit < 0 || digit > 5) {
1076 return fail(hexStart, "Invalid unicode escape");
447 } 1077 }
448 char = source.codeUnitAt(position); 1078 value += digit + 10;
449 char -= 0x30;
450 if (char < 0) fail(hexStart, "Invalid unicode escape");
451 if (char < 10) {
452 value = value * 16 + char;
453 } else {
454 char = (char | 0x20) - 0x31;
455 if (char < 0 || char > 5) {
456 fail(hexStart, "Invalid unicode escape");
457 }
458 value = value * 16 + char + 10;
459 }
460 } 1079 }
461 char = value;
462 break;
463 default:
464 if (char < SPACE) fail(position, "Control character in string");
465 fail(position, "Unrecognized string escape");
466 }
467 do {
468 chars.add(char);
469 position++;
470 if (position == source.length) fail(start - 1, "Unterminated string");
471 char = source.codeUnitAt(position);
472 if (char == QUOTE) {
473 String result = new String.fromCharCodes(chars);
474 listener.handleString(result);
475 return position + 1;
476 } 1080 }
477 if (char < SPACE) { 1081 char = value;
478 fail(position, "Control character in string"); 1082 break;
479 } 1083 default:
480 } while (char != BACKSLASH); 1084 if (char < SPACE) return fail(position, "Control character in string");
481 position++; 1085 return fail(position, "Unrecognized string escape");
482 } 1086 }
1087 addCharToString(char);
1088 if (position == length) return chunkString(STR_PLAIN);
1089 return position;
1090 }
1091
1092 /// Sets up a partial numeral state.
1093 /// Returns chunkEnd to allow easy one-line bailout tests.
1094 int beginChunkNumber(int state, int start) {
1095 int end = chunkEnd;
1096 int length = end - start;
1097 var buffer = new _NumberBuffer(length);
1098 copyCharsToList(start, end, buffer.list, 0);
1099 buffer.length = length;
1100 this.buffer = buffer;
1101 this.partialState = PARTIAL_NUMERAL | state;
1102 return end;
1103 }
1104
1105 void addNumberChunk(_NumberBuffer buffer, int start, int end, int overhead) {
1106 int length = end - start;
1107 int count = buffer.length;
1108 int newCount = count + length;
1109 int newCapacity = newCount + overhead;
1110 buffer.ensureCapacity(newCapacity);
1111 copyCharsToList(start, end, buffer.list, count);
1112 buffer.length = newCount;
1113 }
1114
1115 // Continues an already chunked number accross an entire chunk.
1116 int continueChunkNumber(int state, int start, _NumberBuffer buffer) {
1117 int end = chunkEnd;
1118 addNumberChunk(buffer, start, end, _NumberBuffer.kDefaultOverhead);
1119 this.buffer = buffer;
1120 this.partialState = PARTIAL_NUMERAL | state;
1121 return end;
1122 }
1123
1124 int finishChunkNumber(int state, int start, int end, _NumberBuffer buffer) {
1125 if (state == NUM_ZERO) {
1126 listener.handleNumber(0);
1127 return;
1128 }
1129 if (end > start) {
1130 addNumberChunk(buffer, start, end, 0);
1131 }
1132 if (state == NUM_DIGIT) {
1133 listener.handleNumber(buffer.parseInt());
1134 } else if (state == NUM_DOT_DIGIT || state == NUM_E_DIGIT) {
1135 listener.handleNumber(buffer.parseDouble());
1136 } else {
1137 fail(chunkEnd, "Unterminated number literal");
1138 }
1139 return end;
483 } 1140 }
484 1141
485 int parseNumber(int char, int position) { 1142 int parseNumber(int char, int position) {
486 // Also called on any unexpected character. 1143 // Also called on any unexpected character.
487 // Format: 1144 // Format:
488 // '-'?('0'|[1-9][0-9]*)('.'[0-9]+)?([eE][+-]?[0-9]+)? 1145 // '-'?('0'|[1-9][0-9]*)('.'[0-9]+)?([eE][+-]?[0-9]+)?
489 int start = position; 1146 int start = position;
490 int length = source.length; 1147 int length = chunkEnd;
491 int intValue = 0; // Collect int value while parsing. 1148 // Collects an int value while parsing. Used for both an integer literal,
492 int intSign = 1; 1149 // an the exponent part of a double literal.
1150 int intValue = 0;
1151 double doubleValue = 0.0; // Collect double value while parsing.
1152 int sign = 1;
493 bool isDouble = false; 1153 bool isDouble = false;
494 // Break this block when the end of the number literal is reached. 1154 // Break this block when the end of the number literal is reached.
495 // At that time, position points to the next character, and isDouble 1155 // At that time, position points to the next character, and isDouble
496 // is set if the literal contains a decimal point or an exponential. 1156 // is set if the literal contains a decimal point or an exponential.
497 parsing: { 1157 parsing: {
498 if (char == MINUS) { 1158 if (char == MINUS) {
499 intSign = -1; 1159 sign = -1;
500 position++; 1160 position++;
501 if (position == length) fail(position, "Missing expected digit"); 1161 if (position == length) return beginChunkNumber(NUM_SIGN, start);
502 char = source.codeUnitAt(position); 1162 char = getChar(position);
503 } 1163 }
504 if (char < CHAR_0 || char > CHAR_9) { 1164 int digit = char ^ CHAR_0;
505 if (intSign < 0) { 1165 if (digit > 9) {
1166 if (sign < 0) {
506 fail(position, "Missing expected digit"); 1167 fail(position, "Missing expected digit");
507 } else { 1168 } else {
508 // If it doesn't even start out as a numeral. 1169 // If it doesn't even start out as a numeral.
509 fail(position, "Unexpected character"); 1170 fail(position, "Unexpected character");
510 } 1171 }
511 } 1172 }
512 if (char == CHAR_0) { 1173 if (digit == 0) {
513 position++; 1174 position++;
514 if (position == length) break parsing; 1175 if (position == length) return beginChunkNumber(NUM_ZERO, start);
515 char = source.codeUnitAt(position); 1176 char = getChar(position);
516 if (CHAR_0 <= char && char <= CHAR_9) { 1177 digit = char ^ CHAR_0;
517 fail(position); 1178 // If starting with zero, next character must not be digit.
518 } 1179 if (digit <= 9) fail(position);
519 } else { 1180 } else {
520 do { 1181 do {
521 intValue = intValue * 10 + (char - CHAR_0); 1182 intValue = 10 * intValue + digit;
522 position++; 1183 position++;
523 if (position == length) break parsing; 1184 if (position == length) return beginChunkNumber(NUM_DIGIT, start);
524 char = source.codeUnitAt(position); 1185 char = getChar(position);
525 } while (CHAR_0 <= char && char <= CHAR_9); 1186 digit = char ^ CHAR_0;
1187 } while (digit <= 9);
526 } 1188 }
527 if (char == DECIMALPOINT) { 1189 if (char == DECIMALPOINT) {
528 isDouble = true; 1190 isDouble = true;
1191 doubleValue = intValue.toDouble();
1192 intValue = 0;
529 position++; 1193 position++;
530 if (position == length) fail(position, "Missing expected digit"); 1194 if (position == length) return beginChunkNumber(NUM_DOT, start);
531 char = source.codeUnitAt(position); 1195 char = getChar(position);
532 if (char < CHAR_0 || char > CHAR_9) fail(position); 1196 digit = char ^ CHAR_0;
1197 if (digit > 9) fail(position);
533 do { 1198 do {
1199 doubleValue = 10.0 * doubleValue + digit;
1200 intValue -= 1;
534 position++; 1201 position++;
535 if (position == length) break parsing; 1202 if (position == length) return beginChunkNumber(NUM_DOT_DIGIT, start);
536 char = source.codeUnitAt(position); 1203 char = getChar(position);
537 } while (CHAR_0 <= char && char <= CHAR_9); 1204 digit = char ^ CHAR_0;
538 } 1205 } while (digit <= 9);
539 if (char == CHAR_e || char == CHAR_E) { 1206 }
540 isDouble = true; 1207 if ((char | 0x20) == CHAR_e) {
1208 if (!isDouble) {
1209 doubleValue = intValue.toDouble();
1210 intValue = 0;
1211 isDouble = true;
1212 }
541 position++; 1213 position++;
542 if (position == length) fail(position, "Missing expected digit"); 1214 if (position == length) return beginChunkNumber(NUM_E, start);
543 char = source.codeUnitAt(position); 1215 char = getChar(position);
1216 int expSign = 1;
1217 int exponent = 0;
544 if (char == PLUS || char == MINUS) { 1218 if (char == PLUS || char == MINUS) {
1219 expSign = 0x2C - char; // -1 for MINUS, +1 for PLUS
545 position++; 1220 position++;
546 if (position == length) fail(position, "Missing expected digit"); 1221 if (position == length) return beginChunkNumber(NUM_E_SIGN, start);
547 char = source.codeUnitAt(position); 1222 char = getChar(position);
548 } 1223 }
549 if (char < CHAR_0 || char > CHAR_9) { 1224 digit = char ^ CHAR_0;
1225 if (digit > 9) {
550 fail(position, "Missing expected digit"); 1226 fail(position, "Missing expected digit");
551 } 1227 }
552 do { 1228 do {
1229 exponent = 10 * exponent + digit;
553 position++; 1230 position++;
554 if (position == length) break parsing; 1231 if (position == length) return beginChunkNumber(NUM_E_DIGIT, start);
555 char = source.codeUnitAt(position); 1232 char = getChar(position);
556 } while (CHAR_0 <= char && char <= CHAR_9); 1233 digit = char ^ CHAR_0;
1234 } while (digit <= 9);
1235 intValue += expSign * exponent;
557 } 1236 }
558 } 1237 }
559 if (!isDouble) { 1238 if (!isDouble) {
560 listener.handleNumber(intSign * intValue); 1239 listener.handleNumber(sign * intValue);
561 return position; 1240 return position;
562 } 1241 }
563 // This correctly creates -0.0 for doubles. 1242 // Double values at or above this value (2**53) may have lost precission.
564 listener.handleNumber(_parseDouble(source, start, position)); 1243 // Only trust results that are below this value.
1244 const double maxExactDouble = 9007199254740992.0;
1245 if (doubleValue < maxExactDouble) {
1246 int exponent = intValue;
1247 double signedMantissa = doubleValue * sign;
1248 if (exponent >= -22) {
1249 if (exponent < 0) {
1250 listener.handleNumber(signedMantissa / POWERS_OF_TEN[-exponent]);
1251 return position;
1252 }
1253 if (exponent == 0) {
1254 listener.handleNumber(signedMantissa);
1255 return position;
1256 }
1257 if (exponent <= 22) {
1258 listener.handleNumber(signedMantissa * POWERS_OF_TEN[exponent]);
1259 return position;
1260 }
1261 }
1262 }
1263 // If the value is outside the range +/-maxExactDouble or
1264 // exponent is outside the range +/-22, then we can't trust simple double
1265 // arithmetic to get the exact result, so we use the system double parsing.
1266 listener.handleNumber(parseDouble(start, position));
565 return position; 1267 return position;
566 } 1268 }
567 1269
568 static double _parseDouble(String source, int start, int end) 1270 int fail(int position, [String message]) {
569 native "Double_parse"; 1271 if (message == null) {
570 1272 message = "Unexpected character";
571 void fail(int position, [String message]) { 1273 if (position == chunkEnd) message = "Unexpected end of input";
572 if (message == null) message = "Unexpected character"; 1274 }
573 throw new FormatException(message, source, position); 1275 throw new FormatException(message, chunk, position);
574 } 1276 }
575 } 1277 }
1278
1279 /**
1280 * Chunked JSON parser that parses [String] chunks.
1281 */
1282 class _JsonStringParser extends _ChunkedJsonParser {
1283 String chunk;
1284 int chunkEnd;
1285
1286 _JsonStringParser(_JsonListener listener) : super(listener);
1287
1288 int getChar(int position) => chunk.codeUnitAt(position);
1289
1290 String getString(int start, int end) {
1291 return chunk.substring(start, end);
1292 }
1293
1294 void beginString() {
1295 this.buffer = new StringBuffer();
1296 }
1297
1298 void addSliceToString(int start, int end) {
1299 StringBuffer buffer = this.buffer;
1300 buffer.write(chunk.substring(start, end));
1301 }
1302
1303 void addCharToString(int charCode) {
1304 StringBuffer buffer = this.buffer;
1305 buffer.writeCharCode(charCode);
1306 }
1307
1308 String endString() {
1309 StringBuffer buffer = this.buffer;
1310 this.buffer = null;
1311 return buffer.toString();
1312 }
1313
1314 void copyCharsToList(int start, int end, List target, int offset) {
1315 int length = end - start;
1316 for (int i = 0; i < length; i++) {
1317 target[offset + i] = chunk.codeUnitAt(start + i);
1318 }
1319 }
1320
1321 double parseDouble(int start, int end) {
1322 return _parseDouble(chunk, start, end);
1323 }
1324 }
1325
1326 patch class JsonDecoder {
1327 /* patch */ StringConversionSink startChunkedConversion(Sink<Object> sink) {
1328 return new _JsonStringDecoderSink(this._reviver, sink);
1329 }
1330 }
1331
1332 /**
1333 * Implements the chunked conversion from a JSON string to its corresponding
1334 * object.
1335 *
1336 * The sink only creates one object, but its input can be chunked.
1337 */
1338 class _JsonStringDecoderSink extends StringConversionSinkBase {
1339 _ChunkedJsonParser _parser;
1340 Function _reviver;
1341 final Sink<Object> _sink;
1342
1343 _JsonStringDecoderSink(reviver, this._sink)
1344 : _reviver = reviver, _parser = _createParser(reviver);
1345
1346 static _ChunkedJsonParser _createParser(reviver) {
1347 _BuildJsonListener listener;
1348 if (reviver == null) {
1349 listener = new _BuildJsonListener();
1350 } else {
1351 listener = new _ReviverJsonListener(reviver);
1352 }
1353 return new _JsonStringParser(listener);
1354 }
1355
1356 void addSlice(String chunk, int start, int end, bool isLast) {
1357 _parser.chunk = chunk;
1358 _parser.chunkEnd = end;
1359 _parser.parse(start);
1360 if (isLast) _parser.close();
1361 }
1362
1363 void add(String chunk) {
1364 addSlice(chunk, 0, chunk.length, false);
1365 }
1366
1367 void close() {
1368 _parser.close();
1369 var decoded = _parser.result;
1370 _sink.add(decoded);
1371 _sink.close();
1372 }
1373
1374 Utf8ConversionSink asUtf8Sink(bool allowMalformed) {
1375 _parser = null;
1376 return new _JsonUtf8DecoderSink(_reviver, _sink, allowMalformed);
1377 }
1378 }
1379
1380 class _Utf8StringBuffer {
1381 static const int INITIAL_CAPACITY = 32;
1382 // Partial state encoding.
1383 static const int MASK_TWO_BIT = 0x03;
1384 static const int MASK_SIZE = MASK_TWO_BIT;
1385 static const int SHIFT_MISSING = 2;
1386 static const int SHIFT_VALUE = 4;
1387 static const int NO_PARTIAL = 0;
1388
1389 // UTF-8 encoding and limits.
1390 static const int MAX_ASCII = 127;
1391 static const int MAX_TWO_BYTE = 0x7ff;
1392 static const int MAX_THREE_BYTE = 0xffff;
1393 static const int MAX_UNICODE = 0X10ffff;
1394 static const int MASK_TWO_BYTE = 0x1f;
1395 static const int MASK_THREE_BYTE = 0x0f;
1396 static const int MASK_FOUR_BYTE = 0x07;
1397 static const int MASK_CONTINUE_TAG = 0xC0;
1398 static const int MASK_CONTINUE_VALUE = 0x3f;
1399 static const int CONTINUE_TAG = 0x80;
1400
1401 // UTF-16 surrogate encoding.
1402 static const int LEAD_SURROGATE = 0xD800;
1403 static const int TAIL_SURROGATE = 0xDC00;
1404 static const int SHIFT_HIGH_SURROGATE = 10;
1405 static const int MASK_LOW_SURROGATE = 0x3ff;
1406
1407 // The internal buffer starts as Uint8List, but may change to Uint16List
1408 // if the string contains non-Latin-1 characters.
1409 List<int> buffer = new Uint8List(INITIAL_CAPACITY);
1410 // Number of elements in buffer.
1411 int length = 0;
1412 // Partial decoding state, for cases where an UTF-8 sequences is split
1413 // between chunks.
1414 int partialState = NO_PARTIAL;
1415 // Whether all characters so far have been Latin-1 (and the buffer is
1416 // still a Uint8List). Set to false when the first non-Latin-1 character
1417 // is encountered, and the buffer is then also converted to a Uint16List.
1418 bool isLatin1 = true;
1419 // If allowing malformed, invalid UTF-8 sequences are converted to
1420 // U+FFFD.
1421 bool allowMalformed;
1422
1423 _Utf8StringBuffer(this.allowMalformed);
1424
1425 /**
1426 * Parse the continuation of a multi-byte UTF-8 sequence.
1427 *
1428 * Parse [utf8] from [position] to [end]. If the sequence extends beyond
1429 * `end`, store the partial state in [partialState], and continue from there
1430 * on the next added slice.
1431 *
1432 * The [size] is the number of expected continuation bytes total,
1433 * and [missing] is the number of remaining continuation bytes.
1434 * The [size] is used to detect overlong encodings.
1435 * The [value] is the value collected so far.
1436 *
1437 * When called after seeing the first multi-byte marker, the [size] and
1438 * [missing] values are always the same, but they may differ if continuing
1439 * after a partial sequence.
1440 */
1441 int addContinuation(List<int> utf8, int position, int end,
1442 int size, int missing, int value) {
1443 int codeEnd = position + missing;
1444 do {
1445 if (position == end) {
1446 missing = codeEnd - position;
1447 partialState =
1448 size | (missing << SHIFT_MISSING) | (value << SHIFT_VALUE);
1449 return end;
1450 }
1451 int char = utf8[position];
1452 if ((char & MASK_CONTINUE_TAG) != CONTINUE_TAG) {
1453 if (allowMalformed) {
1454 addCharCode(0xFFFD);
1455 return position;
1456 }
1457 throw new FormatException("Expected UTF-8 continuation byte, "
1458 "found $char", utf8, position);
1459 }
1460 value = 64 * value + (char & MASK_CONTINUE_VALUE);
1461 position++;
1462 } while (position < codeEnd);
1463 if (value <= const [0, MAX_ASCII, MAX_TWO_BYTE, MAX_THREE_BYTE][size]) {
1464 // Over-long encoding.
1465 if (allowMalformed) {
1466 value = 0xFFFD;
1467 } else {
1468 throw new FormatException(
1469 "Invalid encoding: U+${value.toRadixString(16).padLeft(4, '0')}"
1470 " encoded in ${size + 1} bytes.", utf8, position - 1);
1471 }
1472 }
1473 addCharCode(value);
1474 return position;
1475 }
1476
1477 void addCharCode(int char) {
1478 assert(char >= 0);
1479 assert(char <= MAX_UNICODE);
1480 if (partialState != NO_PARTIAL) {
1481 if (allowMalformed) {
1482 partialState = NO_PARTIAL;
1483 addCharCode(0xFFFD);
1484 } else {
1485 throw new FormatException("Incomplete UTF-8 sequence", utf8);
1486 }
1487 }
1488 if (isLatin1 && char > 0xff) {
1489 _to16Bit(); // Also grows a little if close to full.
1490 }
1491 int length = this.length;
1492 if (char <= MAX_THREE_BYTE) {
1493 if (length == buffer.length) _grow();
1494 buffer[length] = char;
1495 this.length = length + 1;
1496 return;
1497 }
1498 if (length + 2 > buffer.length) _grow();
1499 int bits = char - 0x10000;
1500 buffer[length] = LEAD_SURROGATE | (bits >> SHIFT_HIGH_SURROGATE);
1501 buffer[length + 1] = TAIL_SURROGATE | (bits & MASK_LOW_SURROGATE);
1502 this.length = length + 2;
1503 }
1504
1505 void _to16Bit() {
1506 assert(isLatin1);
1507 Uint16List newBuffer;
1508 if ((length + INITIAL_CAPACITY) * 2 <= buffer.length) {
1509 // Reuse existing buffer if it's big enough.
1510 newBuffer = new Uint16List.view(buffer.buffer);
1511 } else {
1512 int newCapacity = buffer.length;
1513 if (newCapacity - length < INITIAL_CAPACITY) {
1514 newCapacity = length + INITIAL_CAPACITY;
1515 }
1516 newBuffer = new Uint16List(newCapacity);
1517 }
1518 newBuffer.setRange(0, length, buffer);
1519 buffer = newBuffer;
1520 isLatin1 = false;
1521 }
1522
1523 void _grow() {
1524 int newCapacity = buffer.length * 2;
1525 List newBuffer;
1526 if (isLatin1) {
1527 newBuffer = new Uint8List(newCapacity);
1528 } else {
1529 newBuffer = new Uint16List(newCapacity);
1530 }
1531 newBuffer.setRange(0, length, buffer);
1532 buffer = newBuffer;
1533 }
1534
1535 void addSlice(List<int> utf8, int position, int end) {
1536 assert(position < end);
1537 if (partialState > 0) {
1538 int continueByteCount = (partialState & MASK_TWO_BIT);
1539 int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT;
1540 int value = partialState >> SHIFT_VALUE;
1541 partialState = NO_PARTIAL;
1542 position = addContinuation(utf8, position, end,
1543 continueByteCount, missing, value);
1544 if (position == end) return;
1545 }
1546 // Keep index and capacity in local variables while looping over
1547 // ASCII characters.
1548 int index = length;
1549 int capacity = buffer.length;
1550 while (position < end) {
1551 int char = utf8[position];
1552 if (char <= MAX_ASCII) {
1553 if (index == capacity) {
1554 length = index;
1555 _grow();
1556 capacity = buffer.length;
1557 }
1558 buffer[index++] = char;
1559 position++;
1560 continue;
1561 }
1562 length = index;
1563 if ((char & MASK_CONTINUE_TAG) == CONTINUE_TAG) {
1564 if (allowMalformed) {
1565 addCharCode(0xFFFD);
1566 position++;
1567 } else {
1568 throw new FormatException("Unexepected UTF-8 continuation byte",
1569 utf8, position);
1570 }
1571 } else if (char < 0xE0) { // C0-DF
1572 // Two-byte.
1573 position = addContinuation(utf8, position + 1, end, 1, 1,
1574 char & MASK_TWO_BYTE);
1575 } else if (char < 0xF0) { // E0-EF
1576 // Three-byte.
1577 position = addContinuation(utf8, position + 1, end, 2, 2,
1578 char & MASK_THREE_BYTE);
1579 } else if (char < 0xF8) { // F0-F7
1580 // Four-byte.
1581 position = addContinuation(utf8, position + 1, end, 3, 3,
1582 char & MASK_FOUR_BYTE);
1583 } else {
1584 if (allowMalformed) {
1585 addCharCode(0xFFFD);
1586 position++;
1587 } else {
1588 throw new FormatException("Invalid UTF-8 byte: $char",
1589 utf8, position);
1590 }
1591 }
1592 index = length;
1593 capacity = buffer.length;
1594 }
1595 length = index;
1596 }
1597
1598 String toString() {
1599 if (partialState != NO_PARTIAL) {
1600 if (allowMalformed) {
1601 partialState = NO_PARTIAL;
1602 addCharCode(0xFFFD);
1603 } else {
1604 int continueByteCount = (partialState & MASK_TWO_BIT);
1605 int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT;
1606 int value = partialState >> SHIFT_VALUE;
1607 int seenByteCount = continueByteCount - missing + 1;
1608 List source = new Uint8List(seenByteCount);
1609 while (seenByteCount > 1) {
1610 seenByteCount--;
1611 source[seenByteCount] = CONTINUE_TAG | (value & MASK_CONTINUE_VALUE);
1612 value >>= 6;
1613 }
1614 source[0] = value | (0x3c0 >> (continueByteCount - 1));
1615 throw new FormatException("Incomplete UTF-8 sequence",
1616 source, source.length);
1617 }
1618 }
1619 return new String.fromCharCodes(buffer, 0, length);
1620 }
1621 }
1622
1623 /**
1624 * Chunked JSON parser that parses UTF-8 chunks.
1625 */
1626 class _JsonUtf8Parser extends _ChunkedJsonParser {
1627 final bool allowMalformed;
1628 List<int> chunk;
1629 int chunkEnd;
1630
1631 _JsonUtf8Parser(_JsonListener listener, this.allowMalformed)
1632 : super(listener);
1633
1634 int getChar(int position) => chunk[position];
1635
1636 String getString(int start, int end) {
1637 beginString();
1638 addSliceToString(start, end);
1639 String result = endString();
1640 return result;
1641 }
1642
1643 void beginString() {
1644 this.buffer = new _Utf8StringBuffer(allowMalformed);
1645 }
1646
1647 void addSliceToString(int start, int end) {
1648 _Utf8StringBuffer buffer = this.buffer;
1649 buffer.addSlice(chunk, start, end);
1650 }
1651
1652 void addCharToString(int charCode) {
1653 _Utf8StringBuffer buffer = this.buffer;
1654 buffer.addCharCode(charCode);
1655 }
1656
1657 String endString() {
1658 _Utf8StringBuffer buffer = this.buffer;
1659 this.buffer = null;
1660 return buffer.toString();
1661 }
1662
1663 void copyCharsToList(int start, int end, List target, int offset) {
1664 int length = end - start;
1665 target.setRange(offset, offset + length, chunk, start);
1666 }
1667
1668 double parseDouble(int start, int end) {
1669 String string = getString(start, end);
1670 reutrn _parseDouble(string, 0, string.length);
1671 }
1672 }
1673
1674 double _parseDouble(String source, int start, int end)
1675 native "Double_parse";
1676
1677 /**
1678 * Implements the chunked conversion from a UTF-8 encoding of JSON
1679 * to its corresponding object.
1680 */
1681 class _JsonUtf8DecoderSink extends ByteConversionSinkBase {
1682 _ChunkedUtf8Parser _parser;
1683 final Sink<Object> _sink;
1684
1685 _JsonUtf8DecoderSink(reviver, this._sink, bool allowMalformed)
1686 : _parser = _createParser(reviver, allowMalformed);
1687
1688 static _ChunkedJsonParser _createParser(reviver, bool allowMalformed) {
1689 _BuildJsonListener listener;
1690 if (reviver == null) {
1691 listener = new _BuildJsonListener();
1692 } else {
1693 listener = new _ReviverJsonListener(reviver);
1694 }
1695 return new _JsonUtf8Parser(listener, allowMalformed);
1696 }
1697
1698 void addSlice(List<int> chunk, int start, int end, bool isLast) {
1699 _addChunk(chunk, start, end);
1700 if (isLast) close();
1701 }
1702
1703 void add(List<int> chunk) {
1704 _addChunk(chunk, 0, chunk.length);
1705 }
1706
1707 void _addChunk(List<int> chunk, int start, int end) {
1708 _parser.chunk = chunk;
1709 _parser.chunkEnd = end;
1710 _parser.parse(start);
1711 }
1712
1713 void close() {
1714 _parser.close();
1715 var decoded = _parser.result;
1716 _sink.add(decoded);
1717 _sink.close();
1718 }
1719 }
OLDNEW
« no previous file with comments | « no previous file | runtime/lib/double_patch.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698