Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(205)

Side by Side Diff: runtime/lib/convert_patch.dart

Issue 649113005: Make JSON parsing work as a chunked conversion sink. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Also add an UTF-8 base JSON parser, without intermediate string representations. Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | runtime/lib/double_patch.dart » ('j') | tests/lib/convert/json_chunk_test.dart » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 import "dart:_internal" show POWERS_OF_TEN;
6
5 // JSON conversion. 7 // JSON conversion.
6 8
7 patch _parseJson(String json, reviver(var key, var value)) { 9 patch _parseJson(String json, reviver(var key, var value)) {
8 _BuildJsonListener listener; 10 _BuildJsonListener listener;
9 if (reviver == null) { 11 if (reviver == null) {
10 listener = new _BuildJsonListener(); 12 listener = new _BuildJsonListener();
11 } else { 13 } else {
12 listener = new _ReviverJsonListener(reviver); 14 listener = new _ReviverJsonListener(reviver);
13 } 15 }
14 new _JsonParser(json, listener).parse(); 16 var parser = new _JsonStringParser(listener);
17 parser.chunk = json;
18 parser.chunkEnd = json.length;
19 parser.parse(0);
20 parser.close();
15 return listener.result; 21 return listener.result;
16 } 22 }
17 23
18 //// Implementation /////////////////////////////////////////////////////////// 24 //// Implementation ///////////////////////////////////////////////////////////
19 25
20 // Simple API for JSON parsing. 26 // Simple API for JSON parsing.
21 27
28 /**
29 * Listener for parsing events from [_ChunkedJsonParser].
30 */
22 abstract class _JsonListener { 31 abstract class _JsonListener {
23 void handleString(String value) {} 32 void handleString(String value) {}
24 void handleNumber(num value) {} 33 void handleNumber(num value) {}
25 void handleBool(bool value) {} 34 void handleBool(bool value) {}
26 void handleNull() {} 35 void handleNull() {}
27 void beginObject() {} 36 void beginObject() {}
28 void propertyName() {} 37 void propertyName() {}
29 void propertyValue() {} 38 void propertyValue() {}
30 void endObject() {} 39 void endObject() {}
31 void beginArray() {} 40 void beginArray() {}
32 void arrayElement() {} 41 void arrayElement() {}
33 void endArray() {} 42 void endArray() {}
34 } 43 }
35 44
36 /** 45 /**
37 * A [JsonListener] that builds data objects from the parser events. 46 * A [_JsonListener] that builds data objects from the parser events.
38 * 47 *
39 * This is a simple stack-based object builder. It keeps the most recently 48 * This is a simple stack-based object builder. It keeps the most recently
40 * seen value in a variable, and uses it depending on the following event. 49 * seen value in a variable, and uses it depending on the following event.
41 */ 50 */
42 class _BuildJsonListener extends _JsonListener { 51 class _BuildJsonListener extends _JsonListener {
43 /** 52 /**
44 * Stack used to handle nested containers. 53 * Stack used to handle nested containers.
45 * 54 *
46 * The current container is pushed on the stack when a new one is 55 * The current container is pushed on the stack when a new one is
47 * started. If the container is a [Map], there is also a current [key] 56 * started. If the container is a [Map], there is also a current [key]
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
128 void propertyValue() { 137 void propertyValue() {
129 value = reviver(key, value); 138 value = reviver(key, value);
130 super.propertyValue(); 139 super.propertyValue();
131 } 140 }
132 141
133 get result { 142 get result {
134 return reviver(null, value); 143 return reviver(null, value);
135 } 144 }
136 } 145 }
137 146
138 class _JsonParser { 147 /**
148 * Buffer holding parts of a numeral.
149 *
150 * The buffer contains the characters of a JSON number.
151 * These are all ASCII, so an [Uint8List] is used as backing store.
152 *
153 * This buffer is used when a JSON number is split between separate chunks.
154 *
155 */
156 class _NumberBuffer {
157 static const int kMinCapacity = 16;
floitsch 2014/10/20 08:52:44 constants in Dart don't start with "k". maybe they
Lasse Reichstein Nielsen 2014/10/27 12:42:32 I know. The style guide changed so they are no lon
158 static const int kDefaultOverhead = 5;
159 Uint8List list;
160 int length = 0;
161 _NumberBuffer(int initialCapacity)
162 : list = new Uint8List(_initialCapacity(initialCapacity));
163
164 int get capacity => list.length;
165
166 // Pick an initial capacity greater than the first part's size.
167 // The typical use case has two parts, this is the attempt at
168 // guessing the size of the second part without overdoing it.
169 // The default estimate of the second part is [kDefaultOverhead],
170 // then round to multiplum of four, and return the result,
171 // or [kMinCapacity] if that is greater.
172 static int _initialCapacity(int minCapacity) {
173 minCapacity += kDefaultOverhead;
174 if (minCapacity < kMinCapacity) return kMinCapacity;
175 minCapacity = (minCapacity + 3) & ~3; // Round to multile of four.
floitsch 2014/10/20 08:52:44 multiple
Lasse Reichstein Nielsen 2014/10/27 12:42:32 Done.
176 return minCapacity;
177 }
178
179 // Grows to the exact size asked for.
180 void ensureCapacity(int newCapcity) {
floitsch 2014/10/20 08:52:44 newCapacity
Lasse Reichstein Nielsen 2014/10/27 12:42:33 Done.
181 Uint8List list = this.list;
182 if (newCapcity <= list.length) return;
183 Uint8List newList = new Uint8List(newCapcity);
184 newList.setRange(0, list.length, list, 0);
185 this.list = newList;
186 }
187
188 String toString() => "NumberBuffer";
floitsch 2014/10/20 08:52:44 maybe add the contents? => "NumberBuffer(${getStr
Lasse Reichstein Nielsen 2014/10/27 12:42:32 I think I had that for debugging, but I'll just re
189
190 String getString() {
191 var list = this.list;
192 if (length < list.length) {
193 list = new Uint8List.view(list.buffer, 0, length);
194 }
195 String result = new String.fromCharCodes(list);
196 return result;
197 }
198
199 // TODO(lrn): See if parsing of numbers can be abstracted to something
200 // not only working on strings, but also on char-code lists, without lossing
201 // performance.
202 int parseInt() => int.parse(getString());
203 double parseDouble() => double.parse(getString());
204 }
205
206 /**
207 * Chunked JSON parser.
208 *
209 * Receives inputs in chunks, gives access to individual parts of the input,
210 * and stores input state between chunks.
211 *
212 * Implementations include [String] and UTF-8 parsers.
213 */
214 abstract class _ChunkedJsonParser {
139 // A simple non-recursive state-based parser for JSON. 215 // A simple non-recursive state-based parser for JSON.
140 // 216 //
141 // Literal values accepted in states ARRAY_EMPTY, ARRAY_COMMA, OBJECT_COLON 217 // Literal values accepted in states ARRAY_EMPTY, ARRAY_COMMA, OBJECT_COLON
142 // and strings also in OBJECT_EMPTY, OBJECT_COMMA. 218 // and strings also in OBJECT_EMPTY, OBJECT_COMMA.
143 // VALUE STRING : , } ] Transitions to 219 // VALUE STRING : , } ] Transitions to
144 // EMPTY X X -> END 220 // EMPTY X X -> END
145 // ARRAY_EMPTY X X @ -> ARRAY_VALUE / pop 221 // ARRAY_EMPTY X X @ -> ARRAY_VALUE / pop
146 // ARRAY_VALUE @ @ -> ARRAY_COMMA / pop 222 // ARRAY_VALUE @ @ -> ARRAY_COMMA / pop
147 // ARRAY_COMMA X X -> ARRAY_VALUE 223 // ARRAY_COMMA X X -> ARRAY_VALUE
148 // OBJECT_EMPTY X @ -> OBJECT_KEY / pop 224 // OBJECT_EMPTY X @ -> OBJECT_KEY / pop
(...skipping 16 matching lines...) Expand all
165 static const int INSIDE_OBJECT = 2; 241 static const int INSIDE_OBJECT = 2;
166 static const int AFTER_COLON = 3; // Always inside object. 242 static const int AFTER_COLON = 3; // Always inside object.
167 243
168 static const int ALLOW_STRING_MASK = 8; // Allowed if zero. 244 static const int ALLOW_STRING_MASK = 8; // Allowed if zero.
169 static const int ALLOW_VALUE_MASK = 4; // Allowed if zero. 245 static const int ALLOW_VALUE_MASK = 4; // Allowed if zero.
170 static const int ALLOW_VALUE = 0; 246 static const int ALLOW_VALUE = 0;
171 static const int STRING_ONLY = 4; 247 static const int STRING_ONLY = 4;
172 static const int NO_VALUES = 12; 248 static const int NO_VALUES = 12;
173 249
174 // Objects and arrays are "empty" until their first property/element. 250 // Objects and arrays are "empty" until their first property/element.
251 // At this position, they may either have an entry or a close-bracket.
175 static const int EMPTY = 0; 252 static const int EMPTY = 0;
176 static const int NON_EMPTY = 16; 253 static const int NON_EMPTY = 16;
177 static const int EMPTY_MASK = 16; // Empty if zero. 254 static const int EMPTY_MASK = 16; // Empty if zero.
178 255
179
180 static const int VALUE_READ_BITS = NO_VALUES | NON_EMPTY; 256 static const int VALUE_READ_BITS = NO_VALUES | NON_EMPTY;
181 257
182 // Actual states. 258 // Actual states.
183 static const int STATE_INITIAL = EMPTY | ALLOW_VALUE; 259 static const int STATE_INITIAL = EMPTY | ALLOW_VALUE;
184 static const int STATE_END = NON_EMPTY | NO_VALUES; 260 static const int STATE_END = NON_EMPTY | NO_VALUES;
185 261
186 static const int STATE_ARRAY_EMPTY = INSIDE_ARRAY | EMPTY | ALLOW_VALUE; 262 static const int STATE_ARRAY_EMPTY = INSIDE_ARRAY | EMPTY | ALLOW_VALUE;
187 static const int STATE_ARRAY_VALUE = INSIDE_ARRAY | NON_EMPTY | NO_VALUES; 263 static const int STATE_ARRAY_VALUE = INSIDE_ARRAY | NON_EMPTY | NO_VALUES;
188 static const int STATE_ARRAY_COMMA = INSIDE_ARRAY | NON_EMPTY | ALLOW_VALUE; 264 static const int STATE_ARRAY_COMMA = INSIDE_ARRAY | NON_EMPTY | ALLOW_VALUE;
189 265
(...skipping 29 matching lines...) Expand all
219 static const int CHAR_f = 0x66; 295 static const int CHAR_f = 0x66;
220 static const int CHAR_l = 0x6c; 296 static const int CHAR_l = 0x6c;
221 static const int CHAR_n = 0x6e; 297 static const int CHAR_n = 0x6e;
222 static const int CHAR_r = 0x72; 298 static const int CHAR_r = 0x72;
223 static const int CHAR_s = 0x73; 299 static const int CHAR_s = 0x73;
224 static const int CHAR_t = 0x74; 300 static const int CHAR_t = 0x74;
225 static const int CHAR_u = 0x75; 301 static const int CHAR_u = 0x75;
226 static const int LBRACE = 0x7b; 302 static const int LBRACE = 0x7b;
227 static const int RBRACE = 0x7d; 303 static const int RBRACE = 0x7d;
228 304
229 final String source; 305 // State of partial value at chunk split.
306 static const int NO_PARTIAL = 0;
307 static const int PARTIAL_STRING = 1;
308 static const int PARTIAL_NUMERAL = 2;
309 static const int PARTIAL_KEYWORD = 3;
310 static const int MASK_PARTIAL = 3;
311
312 // Partial states for numerals. Values can be |'ed with PARTIAL_NUMERAL.
313 static const int NUM_SIGN = 0; // After initial '-'.
314 static const int NUM_ZERO = 4; // After '0' as first digit.
315 static const int NUM_DIGIT = 8; // After digit, no '.' or 'e' seen.
316 static const int NUM_DOT = 12; // After '.'.
317 static const int NUM_DOT_DIGIT = 16; // After a decimal digit (after '.').
318 static const int NUM_E = 20; // After 'e' or 'E'.
319 static const int NUM_E_SIGN = 24; // After '-' or '+' after 'e' or 'E'.
320 static const int NUM_E_DIGIT = 28; // After exponent digit.
321 static const int NUM_SUCCESS = 32; // Never stored as partial state.
322
323 // Partial states for strings.
324 static const int STR_PLAIN = 0; // Inside string, but not escape.
325 static const int STR_ESCAPE = 4; // After '\'.
326 static const int STR_U = 16; // After '\u' and 0-3 hex digits.
327 static const int STR_U_COUNT_SHIFT = 2; // Hex digit count in bits 2-3.
328 static const int STR_U_VALUE_SHIFT = 5; // Hex digit value in bits 5+.
329
330 // Partial states for keywords.
331 static const int KWD_TYPE_MASK = 12;
332 static const int KWD_TYPE_SHIFT = 2;
333 static const int KWD_NULL = 0; // Prefix of "null" seen.
334 static const int KWD_TRUE = 4; // Prefix of "true" seen.
335 static const int KWD_FALSE = 8; // Prefix of "false" seen.
336 static const int KWD_COUNT_SHIFT = 4; // Prefix length in bits 4+.
337
338 // Mask used to mask off two lower bits.
339 static const int TWO_BIT_MASK = 3;
340
230 final _JsonListener listener; 341 final _JsonListener listener;
231 _JsonParser(this.source, this.listener); 342
343 // The current parsing state.
344 int state = STATE_INITIAL;
345 List<int> states = <int>[];
346
347 /**
348 * Stores tokenizer state between chunks.
349 *
350 * This state is stored when a chunk stops in the middle of a
351 * token (string, numeral, boolean or null).
352 *
353 * The partial state is used to continue parsing on the next chunk.
354 * The previous chunk is not retained, any data needed are stored in
355 * this integer, or in the [buffer] field as a string-building buffer
356 * or a [_NumberBuffer].
357 *
358 * Prefix state stored in [prefixState] as bits.
359 *
360 * ..00 : No partial value (NO_PARTIAL).
361 *
362 * ..00001 : Partial string, not inside escape.
363 * ..00101 : Partial string, after '\'.
364 * ..vvvv1dd01 : Partial \u escape.
365 * The 'dd' bits (2-3) encode the number of hex digits seen.
366 * Bits 5-16 encode the value of the hex digits seen so far.
367 *
368 * ..0ddd10 : Partial numeral.
369 * The `ddd` bits store the parts of in the numeral seen so
370 * far, as the constants `NUM_*` defined above.
371 * The characters of the numeral are stored in [buffer]
372 * as a [_NumberBuffer].
373 *
374 * ..0ddd0011 : Partial 'null' keyword.
375 * ..0ddd0111 : Partial 'true' keyword.
376 * ..0ddd1011 : Partial 'false' keyword.
377 * For all three keywords, the `ddd` bits encode the number
378 * of letters seen.
379 */
380 int partialState = NO_PARTIAL;
381
382 /**
383 * Extra data stored while parsing a primitive value.
384 * May be set during parsing, always set at chunk end if a value is partial.
385 *
386 * May contain a string buffer while parsing strings.
387 */
388 var buffer = null;
389
390 _ChunkedJsonParser(this.listener);
391
392 /**
393 * Push the current parse [state] on a stack.
394 *
395 * State is pushed when a new array or object literal starts,
396 * so the parser can go back to the correct value when the literal ends.
397 */
398 void saveState(int state) {
399 states.add(state);
400 }
401
402 /**
403 * Restore a state pushed with [saveState].
404 */
405 int restoreState() {
406 return states.removeLast(); // Throws if empty.
407 }
408
409 /**
410 * Finalizes the parsing.
411 *
412 * If the source ends in a number, it will be completed. Any other partial
413 * state is an error.
Søren Gjesse 2014/10/24 11:12:24 And the states stack is empty, right?
Lasse Reichstein Nielsen 2014/10/27 12:42:33 That's what the next paragraph tries to say. I'll
414 *
415 * Throws if the source read so far doesn't end up with a complete
416 * parsed value.
417 */
418 void close() {
419 if (partialState != NO_PARTIAL) {
420 int partialType = partialState & MASK_PARTIAL;
421 if (partialType == PARTIAL_NUMERAL) {
422 int numState = partialState & ~MASK_PARTIAL;
423 // A partial number might be a valid number if we know it's done.
424 // There is an unnecessary overhead if input is a single number,
425 // but this is assumed to be rare.
426 _NumberBuffer buffer = this.buffer;
427 this.buffer = null;
428 finishChunkNumber(numState, 0, 0, buffer);
429 } else if (partialType == PARTIAL_STRING) {
430 fail(chunkEnd, "Unterminate string");
Søren Gjesse 2014/10/24 11:12:24 Unterminated
Lasse Reichstein Nielsen 2014/10/27 12:42:32 Done.
431 } else {
432 assert(partialType == PARTIAL_KEYWORD);
433 fail(chunkEnd); // Incomplete literal.
434 }
435 }
436 if (state != STATE_END) {
437 fail(chunkEnd);
438 }
439 }
440
441 /**
442 * Read out the result after successfully closing the parser.
443 *
444 * The parser is closed by calling [close] or calling [addSourceChunk] with
445 * `true` as second (`isLast`) argument.
446 */
447 Object get result {
448 return listener.result;
449 }
450
451 // Sets the current source chunk.
floitsch 2014/10/20 08:52:44 Make all these comments dartdocs.
Lasse Reichstein Nielsen 2014/10/27 12:42:33 Done.
452 void set chunk(var source);
453
454 // Length of current chunk.
455 int get chunkEnd;
456
457 // Returns the chunk itself. Used by fail to include it in FormatException.
Søren Gjesse 2014/10/24 11:12:23 So the FormatException only have the chunk as the
Lasse Reichstein Nielsen 2014/10/27 12:42:32 Yes, that's all we have. The FormatException will
458 get chunk;
459
460 // Get charcacter/code unit of current chunk.
461 int getChar(int index);
462
463 // Copy ASCII characters from start to end of chunk into a list.
464 // Used for number buffer (always copies ASCII, so encoding is not important).
465 void copyCharsToList(int start, int end, List<int> target);
466
467 // Build a string using input code units. Creates a string buffer
468 // and enables adding characters and slices to that buffer.
469 // The buffer is stored in [buffer]. If the string is unterminated,
470 // the same buffer is used to continue parsing in the next chunk.
471 void beginString();
472 // Add single character code to string being built.
473 void addCharToString(int charCode);
474 // Adds slice of current chunk to string being built.
floitsch 2014/10/20 08:52:44 end exclusive?
Lasse Reichstein Nielsen 2014/10/27 12:42:33 Acknowledged.
475 void addSliceToString(int start, int end);
476 // Finalizes the string being built and returns it as a String.
477 String endString();
478
479 // Extracts a literal string from a source slice.
Søren Gjesse 2014/10/24 11:12:23 source slice -> chunk slice
Lasse Reichstein Nielsen 2014/10/27 12:42:32 Done.
480 // No interpretation of the content is performed, except for converting
481 // the source format to string.
482 // This can be implemented more or less efficiently depending on the
483 // underlying source.
484 String getString(int start, int end);
485
486 // Parse a slice of input as an integer.
Søren Gjesse 2014/10/24 11:12:24 slice of input -> chunk slice
Lasse Reichstein Nielsen 2014/10/27 12:42:33 Done.
487 // The format is expected to be correct.
488 int parseInt(int start, int end) {
489 return int.parse(getString(start, end));
490 }
491
492 // Parse a slice of input as a double.
Søren Gjesse 2014/10/24 11:12:24 ditto.
Lasse Reichstein Nielsen 2014/10/27 12:42:33 Done.
493 // The format is expected to be correct.
494 double parseDouble(int start, int end) {
495 return double.parse(getString(start, end));
496 }
497
498 // Create a _NumberBuffer containing the digits from [start] to [chunkEnd].
499 void createNumberBuffer(int start) {
500 assert(start >= 0);
501 assert(start < chunkEnd);
502 int length = chunkEnd - start;
503 var buffer = new _NumberBuffer(length);
504 copyCharsToList(start, chunkEnd, buffer.list);
505 buffer.length = length;
506 return buffer;
507 }
508
509 /**
510 * Continues parsing a partial value.
511 */
512 int parsePartial(int position) {
513 if (position == chunkEnd) return position;
514 int partialState = this.partialState;
515 assert(partialState != NO_PARTIAL);
516 int partialType = partialState & MASK_PARTIAL;
517 this.partialState = NO_PARTIAL;
518 partialState = partialState & ~MASK_PARTIAL;
519 assert(partialType != 0);
520 if (partialType == PARTIAL_STRING) {
521 position = parsePartialString(position, partialState);
522 } else if (partialType == PARTIAL_NUMERAL) {
523 position = parsePartialNumber(position, partialState);
524 } else if (partialType == PARTIAL_KEYWORD) {
525 position = parsePartialKeyword(position, partialState);
526 }
527 return position;
528 }
529
530 // Parses the remainder of a number into the number buffer,
531 // checking syntax as it goes.
532 // Starts at chunk index 0, and returns the index of the first
Søren Gjesse 2014/10/24 11:12:24 chunk index 0 -> current chunk index?
Lasse Reichstein Nielsen 2014/10/27 12:42:33 At [position] actually.
533 // non-digit character found, or chunkEnd if the entire chunk is
534 // used.
535 // Throws if a syntax error is detected.
536 int parsePartialNumber(int position, int state) {
537 int start = position;
538 // Primitive implementation, can be optimized.
539 _NumberBuffer buffer = this.buffer;
540 this.buffer = null;
541 int end = chunkEnd;
542 toBailout: {
543 if (position == end) break toBailout;
544 int char = getChar(position);
545 int digit = char ^ CHAR_0;
546 if (state == NUM_SIGN) {
547 if (digit <= 9) {
548 if (digit == 0) {
549 state = NUM_ZERO;
550 } else {
551 state = NUM_DIGIT;
552 }
553 position++;
554 if (position == end) break toBailout;
555 char = getChar(position);
556 digit = char ^ CHAR_0;
557 } else {
558 return fail(position);
559 }
560 }
561 if (state == NUM_ZERO) {
562 if (digit <= 9) return fail(position);
floitsch 2014/10/20 08:52:44 Add comment, why this is not allowed.
Lasse Reichstein Nielsen 2014/10/27 12:42:33 Done.
563 state = NUM_DIGIT;
564 }
565 while (state == NUM_DIGIT) {
566 if (digit > 9) {
567 if (char == DECIMALPOINT) {
568 state = NUM_DOT;
569 } else if ((char | 0x20) == CHAR_e) {
570 state = NUM_E;
571 } else {
572 finishChunkNumber(state, start, position, buffer);
573 return position;
574 }
575 }
576 position++;
577 if (position == end) break toBailout;
578 char = getChar(position);
579 digit = char ^ CHAR_0;
580 }
581 if (state == NUM_DOT) {
582 if (digit > 9) return fail(position);
583 state = NUM_DOT_DIGIT;
584 }
585 while (state == NUM_DOT_DIGIT) {
586 if (digit > 9) {
587 if ((char | 0x20) == CHAR_e) {
588 state = NUM_E;
589 } else {
590 finishChunkNumber(state, start, position, buffer);
591 return position;
592 }
593 }
594 position++;
595 if (position == end) break toBailout;
596 char = getChar(position);
597 digit = char ^ CHAR_0;
598 }
599 if (state == NUM_E) {
600 if (char == PLUS || char == MINUS) {
601 state = NUM_E_SIGN;
602 position++;
603 if (position == end) break toBailout;
604 char = getChar(position);
605 digit = char ^ CHAR_0;
606 }
607 }
608 assert(state >= NUM_E);
609 while (digit <= 9) {
610 state = NUM_E_DIGIT;
611 position++;
612 if (position == end) break toBailout;
613 char = getChar(position);
614 digit = char ^ CHAR_0;
615 }
616 finishChunkNumber(state, start, position, buffer);
617 return position;
618 }
619 // Bailout code in case the current chunk ends while parsing the numeral.
620 assert(position == end);
621 continueChunkNumber(state, start, buffer);
622 return chunkEnd;
623 }
624
625 int parsePartialString(int position, int partialState) {
626 if (partialState == STR_PLAIN) {
627 return parseStringToBuffer(position);
628 }
629 if (partialState == STR_ESCAPE) {
630 position = parseStringEscape(position);
631 // parseStringEscape sets partialState if it sees the end.
632 if (position == chunkEnd) return position;
633 return parseStringToBuffer(position);
634 }
635 assert((partialState & STR_U) != 0);
636 int value = partialState >> STR_U_VALUE_SHIFT;
637 int count = (partialState >> STR_U_COUNT_SHIFT) & TWO_BIT_MASK;
638 for (int i = count; i < 4; i++, position++) {
639 if (position == chunkEnd) return chunkStringEscapeU(i, value);
640 int char = getChar(position);
641 int digit = parseHexDigit(char);
642 if (digit < 0) fail(position, "Invalid hex digit");
643 value = 16 * value + digit;
644 }
645 addCharToString(value);
646 return parseStringToBuffer(position);
647 }
648
649 int parsePartialKeyword(int position, int partialState) {
650 int keywordType = partialState & KWD_TYPE_MASK;
651 int count = partialState >> KWD_COUNT_SHIFT;
652 int keywordTypeIndex = keywordType >> KWD_TYPE_SHIFT;
653 String keyword = const ["null", "true", "false"][keywordTypeIndex];
654 assert(count < keyword.length);
655 do {
656 if (position == chunkEnd) {
657 this.partialState =
658 PARTIAL_KEYWORD | keywordType | (count << KWD_COUNT_SHIFT);
659 return chunkEnd;
660 }
661 int expectedChar = keyword.codeUnitAt(count);
662 if (getChar(position) != expectedChar) return fail(position);
663 position++;
664 count++;
665 } while (count < keyword.length);
666 if (keywordType == KWD_NULL) {
667 listener.handleNull();
668 } else {
669 listener.handleBool(keywordType == KWD_TRUE);
670 }
671 return position;
672 }
673
674 int parseHexDigit(int char) {
675 int digit = char ^ 0x30;
676 if (digit <= 9) return digit;
677 int letter = (char | 0x20) ^ 0x60;
678 // values 1 .. 6 are 'a' through 'f'
679 if (letter <= 6 && letter > 0) return letter + 9;
680 return -1;
681 }
232 682
233 /** Parses [source], or throws if it fails. */ 683 /** Parses [source], or throws if it fails. */
234 void parse() { 684 void parse(int position) {
235 final List<int> states = <int>[]; 685 int length = chunkEnd;
236 int state = STATE_INITIAL; 686 if (partialState != NO_PARTIAL) {
237 int position = 0; 687 position = parsePartial(position);
238 int length = source.length; 688 if (position == length) return;
689 }
690 int state = this.state;
239 while (position < length) { 691 while (position < length) {
240 int char = source.codeUnitAt(position); 692 int char = getChar(position);
241 switch (char) { 693 switch (char) {
242 case SPACE: 694 case SPACE:
243 case CARRIAGE_RETURN: 695 case CARRIAGE_RETURN:
244 case NEWLINE: 696 case NEWLINE:
245 case TAB: 697 case TAB:
246 position++; 698 position++;
247 break; 699 break;
248 case QUOTE: 700 case QUOTE:
249 if ((state & ALLOW_STRING_MASK) != 0) fail(position); 701 if ((state & ALLOW_STRING_MASK) != 0) return fail(position);
702 state |= VALUE_READ_BITS;
250 position = parseString(position + 1); 703 position = parseString(position + 1);
251 state |= VALUE_READ_BITS;
252 break; 704 break;
253 case LBRACKET: 705 case LBRACKET:
254 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); 706 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
255 listener.beginArray(); 707 listener.beginArray();
256 states.add(state); 708 saveState(state);
257 state = STATE_ARRAY_EMPTY; 709 state = STATE_ARRAY_EMPTY;
258 position++; 710 position++;
259 break; 711 break;
260 case LBRACE: 712 case LBRACE:
261 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); 713 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
262 listener.beginObject(); 714 listener.beginObject();
263 states.add(state); 715 saveState(state);
264 state = STATE_OBJECT_EMPTY; 716 state = STATE_OBJECT_EMPTY;
265 position++; 717 position++;
266 break; 718 break;
267 case CHAR_n: 719 case CHAR_n:
268 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); 720 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
721 state |= VALUE_READ_BITS;
269 position = parseNull(position); 722 position = parseNull(position);
270 state |= VALUE_READ_BITS;
271 break; 723 break;
272 case CHAR_f: 724 case CHAR_f:
273 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); 725 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
726 state |= VALUE_READ_BITS;
274 position = parseFalse(position); 727 position = parseFalse(position);
275 state |= VALUE_READ_BITS;
276 break; 728 break;
277 case CHAR_t: 729 case CHAR_t:
278 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); 730 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
731 state |= VALUE_READ_BITS;
279 position = parseTrue(position); 732 position = parseTrue(position);
280 state |= VALUE_READ_BITS;
281 break; 733 break;
282 case COLON: 734 case COLON:
283 if (state != STATE_OBJECT_KEY) fail(position); 735 if (state != STATE_OBJECT_KEY) return fail(position);
284 listener.propertyName(); 736 listener.propertyName();
285 state = STATE_OBJECT_COLON; 737 state = STATE_OBJECT_COLON;
286 position++; 738 position++;
287 break; 739 break;
288 case COMMA: 740 case COMMA:
289 if (state == STATE_OBJECT_VALUE) { 741 if (state == STATE_OBJECT_VALUE) {
290 listener.propertyValue(); 742 listener.propertyValue();
291 state = STATE_OBJECT_COMMA; 743 state = STATE_OBJECT_COMMA;
292 position++; 744 position++;
293 } else if (state == STATE_ARRAY_VALUE) { 745 } else if (state == STATE_ARRAY_VALUE) {
294 listener.arrayElement(); 746 listener.arrayElement();
295 state = STATE_ARRAY_COMMA; 747 state = STATE_ARRAY_COMMA;
296 position++; 748 position++;
297 } else { 749 } else {
298 fail(position); 750 return fail(position);
299 } 751 }
300 break; 752 break;
301 case RBRACKET: 753 case RBRACKET:
302 if (state == STATE_ARRAY_EMPTY) { 754 if (state == STATE_ARRAY_EMPTY) {
303 listener.endArray(); 755 listener.endArray();
304 } else if (state == STATE_ARRAY_VALUE) { 756 } else if (state == STATE_ARRAY_VALUE) {
305 listener.arrayElement(); 757 listener.arrayElement();
306 listener.endArray(); 758 listener.endArray();
307 } else { 759 } else {
308 fail(position); 760 return fail(position);
309 } 761 }
310 state = states.removeLast() | VALUE_READ_BITS; 762 state = restoreState() | VALUE_READ_BITS;
311 position++; 763 position++;
312 break; 764 break;
313 case RBRACE: 765 case RBRACE:
314 if (state == STATE_OBJECT_EMPTY) { 766 if (state == STATE_OBJECT_EMPTY) {
315 listener.endObject(); 767 listener.endObject();
316 } else if (state == STATE_OBJECT_VALUE) { 768 } else if (state == STATE_OBJECT_VALUE) {
317 listener.propertyValue(); 769 listener.propertyValue();
318 listener.endObject(); 770 listener.endObject();
319 } else { 771 } else {
320 fail(position); 772 return fail(position);
321 } 773 }
322 state = states.removeLast() | VALUE_READ_BITS; 774 state = restoreState() | VALUE_READ_BITS;
323 position++; 775 position++;
324 break; 776 break;
325 default: 777 default:
326 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); 778 if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
779 state |= VALUE_READ_BITS;
327 position = parseNumber(char, position); 780 position = parseNumber(char, position);
328 state |= VALUE_READ_BITS;
329 break; 781 break;
330 } 782 }
331 } 783 }
332 if (state != STATE_END) fail(position); 784 this.state = state;
333 } 785 }
334 786
335 /** 787 /**
336 * Parses a "true" literal starting at [position]. 788 * Parses a "true" literal starting at [position].
337 * 789 *
338 * [:source[position]:] must be "t". 790 * [:source[position]:] must be "t".
339 */ 791 */
340 int parseTrue(int position) { 792 int parseTrue(int position) {
341 assert(source.codeUnitAt(position) == CHAR_t); 793 assert(getChar(position) == CHAR_t);
342 if (source.length < position + 4) fail(position, "Unexpected identifier"); 794 if (chunkEnd < position + 4) {
343 if (source.codeUnitAt(position + 1) != CHAR_r || 795 return parseKeywordPrefix(position, "true", KWD_TRUE);
344 source.codeUnitAt(position + 2) != CHAR_u || 796 }
345 source.codeUnitAt(position + 3) != CHAR_e) { 797 if (getChar(position + 1) != CHAR_r ||
346 fail(position); 798 getChar(position + 2) != CHAR_u ||
799 getChar(position + 3) != CHAR_e) {
800 return fail(position);
347 } 801 }
348 listener.handleBool(true); 802 listener.handleBool(true);
349 return position + 4; 803 return position + 4;
350 } 804 }
351 805
352 /** 806 /**
353 * Parses a "false" literal starting at [position]. 807 * Parses a "false" literal starting at [position].
354 * 808 *
355 * [:source[position]:] must be "f". 809 * [:source[position]:] must be "f".
356 */ 810 */
357 int parseFalse(int position) { 811 int parseFalse(int position) {
358 assert(source.codeUnitAt(position) == CHAR_f); 812 assert(getChar(position) == CHAR_f);
359 if (source.length < position + 5) fail(position, "Unexpected identifier"); 813 if (chunkEnd < position + 5) {
360 if (source.codeUnitAt(position + 1) != CHAR_a || 814 return parseKeywordPrefix(position, "false", KWD_FALSE);
361 source.codeUnitAt(position + 2) != CHAR_l || 815 }
362 source.codeUnitAt(position + 3) != CHAR_s || 816 if (getChar(position + 1) != CHAR_a ||
363 source.codeUnitAt(position + 4) != CHAR_e) { 817 getChar(position + 2) != CHAR_l ||
364 fail(position); 818 getChar(position + 3) != CHAR_s ||
819 getChar(position + 4) != CHAR_e) {
820 return fail(position);
365 } 821 }
366 listener.handleBool(false); 822 listener.handleBool(false);
367 return position + 5; 823 return position + 5;
368 } 824 }
369 825
370 /** 826 /**
371 * Parses a "null" literal starting at [position]. 827 * Parses a "null" literal starting at [position].
372 * 828 *
373 * [:source[position]:] must be "n". 829 * [:source[position]:] must be "n".
374 */ 830 */
375 int parseNull(int position) { 831 int parseNull(int position) {
376 assert(source.codeUnitAt(position) == CHAR_n); 832 assert(getChar(position) == CHAR_n);
377 if (source.length < position + 4) fail(position, "Unexpected identifier"); 833 if (chunkEnd < position + 4) {
378 if (source.codeUnitAt(position + 1) != CHAR_u || 834 return parseKeywordPrefix(position, "null", KWD_NULL);
379 source.codeUnitAt(position + 2) != CHAR_l || 835 }
380 source.codeUnitAt(position + 3) != CHAR_l) { 836 if (getChar(position + 1) != CHAR_u ||
381 fail(position); 837 getChar(position + 2) != CHAR_l ||
838 getChar(position + 3) != CHAR_l) {
839 return fail(position);
382 } 840 }
383 listener.handleNull(); 841 listener.handleNull();
384 return position + 4; 842 return position + 4;
385 } 843 }
386 844
845 int parseKeywordPrefix(int position, String chars, int type) {
846 assert(getChar(position) == chars.codeUnitAt(0));
847 int length = chunkEnd;
848 int start = position;
849 int count = 1;
850 while (++position < length) {
851 int char = getChar(position);
852 if (char != chars.codeUnitAt(count)) return fail(start);
853 count++;
854 }
855 this.partialState = PARTIAL_KEYWORD | type | (count << KWD_COUNT_SHIFT);
856 return length;
857 }
858
387 /** 859 /**
388 * Parses a string value. 860 * Parses a string value.
389 * 861 *
390 * Initial [position] is right after the initial quote. 862 * Initial [position] is right after the initial quote.
391 * Returned position right after the final quote. 863 * Returned position right after the final quote.
392 */ 864 */
393 int parseString(int position) { 865 int parseString(int position) {
394 // Format: '"'([^\x00-\x1f\\\"]|'\\'[bfnrt/\\"])*'"' 866 // Format: '"'([^\x00-\x1f\\\"]|'\\'[bfnrt/\\"])*'"'
395 // Initial position is right after first '"'. 867 // Initial position is right after first '"'.
396 int start = position; 868 int start = position;
397 while (position < source.length) { 869 int end = chunkEnd;
398 int char = source.codeUnitAt(position++); 870 while (position < end) {
871 int char = getChar(position++);
399 // BACKSLASH is larger than QUOTE and SPACE. 872 // BACKSLASH is larger than QUOTE and SPACE.
400 if (char > BACKSLASH) { 873 if (char > BACKSLASH) {
401 continue; 874 continue;
402 } 875 }
403 if (char == BACKSLASH) { 876 if (char == BACKSLASH) {
404 return parseStringWithEscapes(start, position - 1); 877 beginString();
878 addSliceToString(start, position - 1);
879 return parseStringToBuffer(position - 1);
405 } 880 }
406 if (char == QUOTE) { 881 if (char == QUOTE) {
407 listener.handleString(source.substring(start, position - 1)); 882 listener.handleString(getString(start, position - 1));
408 return position; 883 return position;
409 } 884 }
410 if (char < SPACE) { 885 if (char < SPACE) {
411 fail(position - 1, "Control character in string"); 886 fail(position - 1, "Control character in string");
412 } 887 }
413 } 888 }
414 fail(start - 1, "Unterminated string"); 889 beginString();
890 addSliceToString(start, end);
891 return chunkString(STR_PLAIN);
415 } 892 }
416 893
417 int parseStringWithEscapes(start, position) { 894 int chunkString(int stringState) {
418 // Backslash escape detected. Collect character codes for rest of string. 895 partialState = PARTIAL_STRING | stringState;
419 int firstEscape = position; 896 return chunkEnd;
420 List<int> chars = <int>[]; 897 }
421 for (int i = start; i < firstEscape; i++) { 898
422 chars.add(source.codeUnitAt(i)); 899 int chunkStringEscapeU(int count, int value) {
900 partialState = PARTIAL_STRING | STR_U |
901 (count << STR_U_COUNT_SHIFT) |
902 (value << STR_U_VALUE_SHIFT);
903 return chunkEnd;
904 }
905
906 int parseStringToBuffer(position) {
907 int end = chunkEnd;
908 int start = position;
909 while (true) {
910 if (position == end) {
911 if (position > start) {
912 addSliceToString(start, position);
913 }
914 return chunkString(STR_PLAIN);
915 }
916 int char = getChar(position++);
917 if (char > BACKSLASH) continue;
918 if (char < SPACE) {
919 fail(position - 1); // Control character in string.
920 return;
921 }
922 if (char == QUOTE) {
923 int quotePosition = position - 1;
924 if (quotePosition > start) {
925 addSliceToString(start, quotePosition);
926 }
927 listener.handleString(endString());
928 return position;
929 }
930 if (char != BACKSLASH) {
931 continue;
932 }
933 // Handle escape.
934 if (position - 1 > start) {
935 addSliceToString(start, position - 1);
936 }
937 if (position == end) return chunkString(STR_ESCAPE);
938 position = parseStringEscape(position);
939 if (position == end) return position;
940 start = position;
423 } 941 }
424 position++; 942 return -1; // UNREACHABLE.
425 while (true) { 943 }
426 if (position == source.length) { 944
427 fail(start - 1, "Unterminated string"); 945 int parseStringEscape(int position) {
Søren Gjesse 2014/10/24 11:12:24 Add a comment that position is just after the back
Lasse Reichstein Nielsen 2014/10/27 12:42:32 Done.
428 } 946 int char = getChar(position++);
429 int char = source.codeUnitAt(position); 947 int length = chunkEnd;
430 switch (char) { 948 switch (char) {
431 case CHAR_b: char = BACKSPACE; break; 949 case CHAR_b: char = BACKSPACE; break;
432 case CHAR_f: char = FORM_FEED; break; 950 case CHAR_f: char = FORM_FEED; break;
433 case CHAR_n: char = NEWLINE; break; 951 case CHAR_n: char = NEWLINE; break;
434 case CHAR_r: char = CARRIAGE_RETURN; break; 952 case CHAR_r: char = CARRIAGE_RETURN; break;
435 case CHAR_t: char = TAB; break; 953 case CHAR_t: char = TAB; break;
436 case SLASH: 954 case SLASH:
437 case BACKSLASH: 955 case BACKSLASH:
438 case QUOTE: 956 case QUOTE:
439 break; 957 break;
440 case CHAR_u: 958 case CHAR_u:
441 int hexStart = position - 1; 959 int hexStart = position - 1;
442 int value = 0; 960 int value = 0;
443 for (int i = 0; i < 4; i++) { 961 for (int i = 0; i < 4; i++) {
444 position++; 962 if (position == length) return chunkStringEscapeU(i, value);
445 if (position == source.length) { 963 char = getChar(position++);
446 fail(start - 1, "Unterminated string"); 964 int digit = char ^ 0x30;
965 value *= 16;
966 if (digit <= 9) {
967 value += digit;
968 } else {
969 digit = (char | 0x20) - CHAR_a;
970 if (digit < 0 || digit > 5) {
971 return fail(hexStart, "Invalid unicode escape");
447 } 972 }
448 char = source.codeUnitAt(position); 973 value += digit + 10;
449 char -= 0x30;
450 if (char < 0) fail(hexStart, "Invalid unicode escape");
451 if (char < 10) {
452 value = value * 16 + char;
453 } else {
454 char = (char | 0x20) - 0x31;
455 if (char < 0 || char > 5) {
456 fail(hexStart, "Invalid unicode escape");
457 }
458 value = value * 16 + char + 10;
459 }
460 } 974 }
461 char = value;
462 break;
463 default:
464 if (char < SPACE) fail(position, "Control character in string");
465 fail(position, "Unrecognized string escape");
466 }
467 do {
468 chars.add(char);
469 position++;
470 if (position == source.length) fail(start - 1, "Unterminated string");
471 char = source.codeUnitAt(position);
472 if (char == QUOTE) {
473 String result = new String.fromCharCodes(chars);
474 listener.handleString(result);
475 return position + 1;
476 } 975 }
477 if (char < SPACE) { 976 char = value;
478 fail(position, "Control character in string"); 977 break;
479 } 978 default:
480 } while (char != BACKSLASH); 979 if (char < SPACE) return fail(position, "Control character in string");
481 position++; 980 return fail(position, "Unrecognized string escape");
482 } 981 }
982 addCharToString(char);
983 if (position == length) return chunkString(STR_PLAIN);
984 return position;
985 }
986
987 /// Sets up a partial numeral state.
988 /// Returns chunkEnd to allow easy one-line bailout tests.
989 int beginChunkNumber(int state, int start) {
990 int end = chunkEnd;
991 int length = end - start;
992 var buffer = new _NumberBuffer(length);
993 copyCharsToList(start, end, buffer.list, 0);
994 buffer.length = length;
995 this.buffer = buffer;
996 this.partialState = PARTIAL_NUMERAL | state;
997 return end;
998 }
999
1000 void addNumberChunk(_NumberBuffer buffer, int start, int end, int overhead) {
1001 int length = end - start;
1002 int count = buffer.length;
1003 int newCount = count + length;
1004 int newCapacity = newCount + overhead;
1005 buffer.ensureCapacity(newCapacity);
1006 copyCharsToList(start, end, buffer.list, count);
1007 buffer.length = newCount;
1008 }
1009
1010 // Continues an already chunked number accross an entire chunk.
1011 int continueChunkNumber(int state, int start, _NumberBuffer buffer) {
1012 int end = chunkEnd;
1013 addNumberChunk(buffer, start, end, _NumberBuffer.kDefaultOverhead);
1014 this.buffer = buffer;
1015 this.partialState = PARTIAL_NUMERAL | state;
1016 return end;
1017 }
1018
1019 int finishChunkNumber(int state, int start, int end, _NumberBuffer buffer) {
1020 if (state == NUM_ZERO) {
1021 listener.handleNumber(0);
1022 return;
1023 }
1024 if (end > start) {
1025 addNumberChunk(buffer, start, end, 0);
1026 }
1027 if (state == NUM_DIGIT) {
1028 listener.handleNumber(buffer.parseInt());
1029 } else if (state == NUM_DOT_DIGIT || state == NUM_E_DIGIT) {
1030 listener.handleNumber(buffer.parseDouble());
1031 } else {
1032 fail(chunkEnd, "Unterminated number literal");
1033 }
1034 return end;
483 } 1035 }
484 1036
485 int parseNumber(int char, int position) { 1037 int parseNumber(int char, int position) {
486 // Also called on any unexpected character. 1038 // Also called on any unexpected character.
487 // Format: 1039 // Format:
488 // '-'?('0'|[1-9][0-9]*)('.'[0-9]+)?([eE][+-]?[0-9]+)? 1040 // '-'?('0'|[1-9][0-9]*)('.'[0-9]+)?([eE][+-]?[0-9]+)?
489 int start = position; 1041 int start = position;
490 int length = source.length; 1042 int length = chunkEnd;
491 int intValue = 0; // Collect int value while parsing. 1043 int intValue = 0; // Collect int value while parsing.
492 int intSign = 1; 1044 double doubleValue = 0.0; // Collect double value while parsing.
1045 int sign = 1;
493 bool isDouble = false; 1046 bool isDouble = false;
494 // Break this block when the end of the number literal is reached. 1047 // Break this block when the end of the number literal is reached.
495 // At that time, position points to the next character, and isDouble 1048 // At that time, position points to the next character, and isDouble
496 // is set if the literal contains a decimal point or an exponential. 1049 // is set if the literal contains a decimal point or an exponential.
497 parsing: { 1050 parsing: {
498 if (char == MINUS) { 1051 if (char == MINUS) {
499 intSign = -1; 1052 sign = -1;
500 position++; 1053 position++;
501 if (position == length) fail(position, "Missing expected digit"); 1054 if (position == length) return beginChunkNumber(NUM_SIGN, start);
502 char = source.codeUnitAt(position); 1055 char = getChar(position);
503 } 1056 }
504 if (char < CHAR_0 || char > CHAR_9) { 1057 int digit = char ^ CHAR_0;
505 if (intSign < 0) { 1058 if (digit > 9) {
1059 if (sign < 0) {
506 fail(position, "Missing expected digit"); 1060 fail(position, "Missing expected digit");
507 } else { 1061 } else {
508 // If it doesn't even start out as a numeral. 1062 // If it doesn't even start out as a numeral.
509 fail(position, "Unexpected character"); 1063 fail(position, "Unexpected character");
510 } 1064 }
511 } 1065 }
512 if (char == CHAR_0) { 1066 if (digit == 0) {
513 position++; 1067 position++;
514 if (position == length) break parsing; 1068 if (position == length) return beginChunkNumber(NUM_ZERO, start);
515 char = source.codeUnitAt(position); 1069 char = getChar(position);
516 if (CHAR_0 <= char && char <= CHAR_9) { 1070 digit = char ^ CHAR_0;
517 fail(position); 1071 // If starting with zero, next character must not be digit.
518 } 1072 if (digit <= 9) fail(position);
519 } else { 1073 } else {
520 do { 1074 do {
521 intValue = intValue * 10 + (char - CHAR_0); 1075 intValue = 10 * intValue + digit;
522 position++; 1076 position++;
523 if (position == length) break parsing; 1077 if (position == length) return beginChunkNumber(NUM_DIGIT, start);
524 char = source.codeUnitAt(position); 1078 char = getChar(position);
525 } while (CHAR_0 <= char && char <= CHAR_9); 1079 digit = char ^ CHAR_0;
1080 } while (digit <= 9);
526 } 1081 }
527 if (char == DECIMALPOINT) { 1082 if (char == DECIMALPOINT) {
528 isDouble = true; 1083 isDouble = true;
1084 doubleValue = intValue.toDouble();
1085 intValue = 0;
529 position++; 1086 position++;
530 if (position == length) fail(position, "Missing expected digit"); 1087 if (position == length) return beginChunkNumber(NUM_DOT, start);
531 char = source.codeUnitAt(position); 1088 char = getChar(position);
532 if (char < CHAR_0 || char > CHAR_9) fail(position); 1089 digit = char ^ CHAR_0;
1090 if (digit > 9) fail(position);
533 do { 1091 do {
1092 doubleValue = 10.0 * doubleValue + digit;
1093 intValue -= 1;
floitsch 2014/10/20 08:52:44 Don't reuse "intValue". Afaics this has nothing to
Lasse Reichstein Nielsen 2014/10/27 12:42:32 It is collecting *an* integer value. I'll just do
534 position++; 1094 position++;
535 if (position == length) break parsing; 1095 if (position == length) return beginChunkNumber(NUM_DOT_DIGIT, start);
536 char = source.codeUnitAt(position); 1096 char = getChar(position);
537 } while (CHAR_0 <= char && char <= CHAR_9); 1097 digit = char ^ CHAR_0;
538 } 1098 } while (digit <= 9);
539 if (char == CHAR_e || char == CHAR_E) { 1099 }
540 isDouble = true; 1100 if ((char | 0x20) == CHAR_e) {
1101 if (!isDouble) {
1102 doubleValue = intValue.toDouble();
1103 intValue = 0;
floitsch 2014/10/20 08:52:44 ditto.
Lasse Reichstein Nielsen 2014/10/27 12:42:32 Same.
1104 isDouble = true;
1105 }
541 position++; 1106 position++;
542 if (position == length) fail(position, "Missing expected digit"); 1107 if (position == length) return beginChunkNumber(NUM_E, start);
543 char = source.codeUnitAt(position); 1108 char = getChar(position);
1109 int expSign = 1;
1110 int exponent = 0;
544 if (char == PLUS || char == MINUS) { 1111 if (char == PLUS || char == MINUS) {
1112 expSign = 0x2C - char; // -1 for MINUS, +1 for PLUS
545 position++; 1113 position++;
546 if (position == length) fail(position, "Missing expected digit"); 1114 if (position == length) return beginChunkNumber(NUM_E_SIGN, start);
547 char = source.codeUnitAt(position); 1115 char = getChar(position);
548 } 1116 }
549 if (char < CHAR_0 || char > CHAR_9) { 1117 digit = char ^ CHAR_0;
1118 if (digit > 9) {
550 fail(position, "Missing expected digit"); 1119 fail(position, "Missing expected digit");
551 } 1120 }
552 do { 1121 do {
1122 exponent = 10 * exponent + digit;
553 position++; 1123 position++;
554 if (position == length) break parsing; 1124 if (position == length) return beginChunkNumber(NUM_E_DIGIT, start);
555 char = source.codeUnitAt(position); 1125 char = getChar(position);
556 } while (CHAR_0 <= char && char <= CHAR_9); 1126 digit = char ^ CHAR_0;
1127 } while (digit <= 9);
1128 intValue += expSign * exponent;
557 } 1129 }
558 } 1130 }
559 if (!isDouble) { 1131 if (!isDouble) {
560 listener.handleNumber(intSign * intValue); 1132 listener.handleNumber(sign * intValue);
561 return position; 1133 return position;
562 } 1134 }
563 // This correctly creates -0.0 for doubles. 1135 const double maxExactDouble = 9007199254740992.0;
floitsch 2014/10/20 08:52:44 comment.
Lasse Reichstein Nielsen 2014/10/27 12:42:32 Done.
564 listener.handleNumber(_parseDouble(source, start, position)); 1136 if (doubleValue < maxExactDouble) {
1137 int exponent = intValue;
1138 double signedMantissa = doubleValue * sign;
1139 if (exponent >= -22) {
1140 if (exponent < 0) {
1141 listener.handleNumber(signedMantissa / POWERS_OF_TEN[-exponent]);
1142 return position;
1143 }
1144 if (exponent == 0) {
1145 listener.handleNumber(signedMantissa);
1146 return position;
1147 }
1148 if (exponent <= 22) {
1149 listener.handleNumber(signedMantissa * POWERS_OF_TEN[exponent]);
1150 return position;
1151 }
1152 }
1153 }
1154 listener.handleNumber(parseDouble(start, position));
floitsch 2014/10/20 08:52:44 comment.
Lasse Reichstein Nielsen 2014/10/27 12:42:33 Done.
565 return position; 1155 return position;
566 } 1156 }
567 1157
568 static double _parseDouble(String source, int start, int end) 1158 int fail(int position, [String message]) {
569 native "Double_parse"; 1159 if (message == null) {
570 1160 message = "Unexpected character";
571 void fail(int position, [String message]) { 1161 if (position == chunkEnd) message = "Unexpected end of input";
572 if (message == null) message = "Unexpected character"; 1162 }
573 throw new FormatException(message, source, position); 1163 throw new FormatException(message, chunk, position);
574 } 1164 }
575 } 1165 }
1166
1167 /**
1168 * Chunked JSON parser that parses [String] chunks.
1169 */
1170 class _JsonStringParser extends _ChunkedJsonParser {
1171 String chunk;
1172 int chunkEnd;
1173
1174 _JsonStringParser(_JsonListener listener) : super(listener);
1175
1176 int getChar(int position) => chunk.codeUnitAt(position);
1177
1178 String getString(int start, int end) {
1179 return chunk.substring(start, end);
1180 }
1181
1182 void beginString() {
1183 this.buffer = new StringBuffer();
1184 }
1185
1186 void addSliceToString(int start, int end) {
1187 StringBuffer buffer = this.buffer;
1188 buffer.write(chunk.substring(start, end));
1189 }
1190
1191 void addCharToString(int charCode) {
1192 StringBuffer buffer = this.buffer;
1193 buffer.writeCharCode(charCode);
1194 }
1195
1196 String endString() {
1197 StringBuffer buffer = this.buffer;
1198 this.buffer = null;
1199 return buffer.toString();
1200 }
1201
1202 void copyCharsToList(int start, int end, List target, int offset) {
1203 int length = end - start;
1204 for (int i = 0; i < length; i++) {
1205 target[offset + i] = chunk.codeUnitAt(start + i);
1206 }
1207 }
1208
1209 double parseDouble(int start, int end) {
1210 return _parseDouble(chunk, start, end);
1211 }
1212 }
1213
1214 patch class JsonDecoder {
1215 /* patch */ StringConversionSink startChunkedConversion(Sink<Object> sink) {
1216 return new _JsonStringDecoderSink(this._reviver, sink);
1217 }
1218 }
1219
1220 /**
1221 * Implements the chunked conversion from a JSON string to its corresponding
1222 * object.
1223 *
1224 * The sink only creates one object, but its input can be chunked.
1225 */
1226 class _JsonStringDecoderSink extends StringConversionSinkBase {
1227 _ChunkedJsonParser _parser;
1228 Function _reviver;
1229 final Sink<Object> _sink;
1230
1231 _JsonStringDecoderSink(reviver, this._sink)
1232 : _reviver = reviver, _parser = _createParser(reviver);
1233
1234 static _ChunkedJsonParser _createParser(reviver) {
1235 _BuildJsonListener listener;
1236 if (reviver == null) {
1237 listener = new _BuildJsonListener();
1238 } else {
1239 listener = new _ReviverJsonListener(reviver);
1240 }
1241 return new _JsonStringParser(listener);
1242 }
1243
1244 void addSlice(String chunk, int start, int end, bool isLast) {
1245 _parser.chunk = chunk;
1246 _parser.chunkEnd = end;
1247 _parser.parse(start);
1248 if (isLast) _parser.close();
1249 }
1250
1251 void add(String chunk) {
1252 addSlice(chunk, 0, chunk.length, false);
1253 }
1254
1255 void close() {
1256 _parser.close();
1257 var decoded = _parser.result;
1258 _sink.add(decoded);
1259 _sink.close();
1260 }
1261
1262 Utf8ConversionSink asUtf8Sink(bool allowMalformed) {
1263 _parser = null;
1264 return new _JsonUtf8DecoderSink(_reviver, _sink, allowMalformed);
1265 }
1266 }
1267
1268 class _Utf8StringBuffer {
1269 static const int INITIAL_CAPACITY = 32;
1270 // Partial state encoding.
1271 static const int MASK_TWO_BIT = 0x03;
1272 static const int MASK_SIZE = MASK_TWO_BIT;
1273 static const int SHIFT_MISSING = 2;
1274 static const int SHIFT_VALUE = 4;
1275 static const int NO_PARTIAL = 0;
1276
1277 // UTF-8 encoding and limits.
1278 static const int MAX_ASCII = 127;
1279 static const int MAX_TWO_BYTE = 0x7ff;
1280 static const int MAX_THREE_BYTE = 0xffff;
1281 static const int MAX_UNICODE = 0X10ffff;
1282 static const int MASK_TWO_BYTE = 0x1f;
1283 static const int MASK_THREE_BYTE = 0x0f;
1284 static const int MASK_FOUR_BYTE = 0x07;
1285 static const int MASK_CONTINUE_TAG = 0xC0;
1286 static const int MASK_CONTINUE_VALUE = 0x3f;
1287 static const int CONTINUE_TAG = 0x80;
1288
1289 // UTF-16 surrogate encoding.
1290 static const int LEAD_SURROGATE = 0xD800;
1291 static const int TAIL_SURROGATE = 0xDC00;
1292 static const int SHIFT_HIGH_SURROGATE = 10;
1293 static const int MASK_LOW_SURROGATE = 0x3ff;
1294
Søren Gjesse 2014/10/24 11:12:24 Comment that buffer starts as Uint8, but might cha
Lasse Reichstein Nielsen 2014/10/27 12:42:32 Comment added. We convert all non-BMP characters t
1295 List<int> buffer = new Uint8List(INITIAL_CAPACITY);
1296 int length = 0;
1297 int partialState = NO_PARTIAL;
1298 bool isLatin1 = true;
1299 // If allowing malformed, invalid UTF-8 sequences are converted to
1300 // U+FFFD.
1301 bool allowMalformed;
1302
1303 _Utf8StringBuffer(this.allowMalformed);
1304
1305 /**
1306 * Parse the continuation of a multi-byte UTF-8 sequence.
1307 *
1308 * Parse [utf8] from [position] to [end]. If the sequence extends beyond
1309 * `end`, store the partial state in [partialState], and continue from there
1310 * on the next added slice.
1311 *
1312 * The [size] is the number of expected continuation bytes total,
1313 * and [missing] is the number of remaining continuation bytes.
1314 * The [size] is used to detect overlong encodings.
1315 * The [value] is the value collected so far.
1316 *
1317 * When called after seeing the first multi-byte marker, the [size] and
1318 * [missing] values are always the same, but they may differ if continuing
1319 * after a partial sequence.
1320 */
1321 int addContinuation(List<int> utf8, int position, int end,
1322 int size, int missing, int value) {
1323 int codeEnd = position + missing;
1324 do {
1325 if (position == end) {
1326 missing = codeEnd - position;
1327 partialState =
1328 size | (missing << SHIFT_MISSING) | (value << SHIFT_VALUE);
1329 return end;
1330 }
1331 int char = utf8[position];
1332 if ((char & MASK_CONTINUE_TAG) != CONTINUE_TAG) {
1333 if (allowMalformed) {
1334 addCharCode(0xFFFD);
1335 return position;
1336 }
1337 throw new FormatException("Expected UTF-8 continuation byte, "
1338 "found $char", utf8, position);
1339 }
1340 value = 64 * value + (char & MASK_CONTINUE_VALUE);
1341 position++;
1342 } while (position < codeEnd);
1343 if (value <= const [0, MAX_ASCII, MAX_TWO_BYTE, MAX_THREE_BYTE][size]) {
1344 // Over-long encoding.
1345 if (allowMalformed) {
1346 value = 0xFFFD;
1347 } else {
1348 throw new FormatException(
1349 "Invalid encoding: U+${value.toRadixString(16).padLeft(4, '0')}"
1350 " encoded in ${size + 1} bytes.", utf8, position - 1);
1351 }
1352 }
1353 addCharCode(value);
1354 return position;
1355 }
1356
1357 void addCharCode(int char) {
1358 assert(char >= 0);
1359 assert(char <= MAX_UNICODE);
1360 if (partialState != NO_PARTIAL) {
1361 if (allowMalformed) {
1362 partialState = NO_PARTIAL;
1363 addCharCode(0xFFFD);
1364 } else {
1365 throw new FormatException("Incomplete UTF-8 sequence", utf8);
1366 }
1367 }
1368 if (isLatin1 && char > 0xff) {
1369 _to16Bit(); // Also grows a little if close to full.
1370 }
1371 int length = this.length;
1372 if (char <= MAX_THREE_BYTE) {
1373 if (length == buffer.length) _grow();
1374 buffer[length] = char;
1375 this.length = length + 1;
1376 return;
1377 }
1378 if (length + 2 > buffer.length) _grow();
1379 int bits = char - 0x10000;
1380 buffer[length] = LEAD_SURROGATE | (bits >> SHIFT_HIGH_SURROGATE);
1381 buffer[length + 1] = TAIL_SURROGATE | (bits & MASK_LOW_SURROGATE);
1382 this.length = length + 2;
1383 }
1384
1385 void _to16Bit() {
1386 assert(isLatin1);
1387 int newCapacity = buffer.length;
1388 if (newCapacity - length < INITIAL_CAPACITY) {
1389 newCapacity = length + INITIAL_CAPACITY;
1390 }
1391 Uint16List newBuffer = new Uint16List(newCapacity);
1392 newBuffer.setRange(0, length, buffer, 0);
1393 buffer = newBuffer;
1394 isLatin1 = false;
1395 }
1396
1397 void _grow() {
1398 int newCapacity = buffer.length * 2;
1399 List newBuffer;
1400 if (isLatin1) {
1401 newBuffer = new Uint8List(newCapacity);
1402 } else {
1403 newBuffer = new Uint16List(newCapacity);
1404 }
1405 newBuffer.setRange(0, length, buffer);
1406 buffer = newBuffer;
1407 }
1408
1409 void addSlice(List<int> utf8, int position, int end) {
1410 assert(position < end);
1411 if (partialState > 0) {
1412 int continueByteCount = (partialState & MASK_TWO_BIT);
1413 int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT;
1414 int value = partialState >> SHIFT_VALUE;
1415 partialState = NO_PARTIAL;
1416 position = addContinuation(utf8, position, end,
1417 continueByteCount, missing, value);
1418 if (position == end) return;
1419 }
1420 int index = length;
1421 int capacity = buffer.length;
1422 while (position < end) {
1423 int char = utf8[position];
1424 if (char <= MAX_ASCII) {
1425 if (index == capacity) _grow();
1426 buffer[index++] = char;
1427 position++;
1428 continue;
1429 }
1430 length = index;
1431 if ((char & MASK_CONTINUE_TAG) == CONTINUE_TAG) {
1432 if (allowMalformed) {
1433 addCharCode(0xFFFD);
1434 position++;
1435 } else {
1436 throw new FormatException("Unexepected UTF-8 continuation byte",
1437 utf8, position);
1438 }
1439 } else if (char < 0xE0) { // C0-DF
1440 // Two-byte.
1441 position = addContinuation(utf8, position + 1, end, 1, 1,
1442 char & MASK_TWO_BYTE);
1443 } else if (char < 0xF0) { // E0-EF
1444 // Three-byte.
1445 position = addContinuation(utf8, position + 1, end, 2, 2,
1446 char & MASK_THREE_BYTE);
1447 } else if (char < 0xF8) { // F0-F7
1448 // Four-byte.
1449 position = addContinuation(utf8, position + 1, end, 3, 3,
1450 char & MASK_FOUR_BYTE);
1451 } else {
1452 if (allowMalformed) {
1453 addCharCode(0xFFFD);
1454 position++;
1455 } else {
1456 throw new FormatException("Invalid UTF-8 byte: $char",
1457 utf8, position);
1458 }
1459 }
1460 index = length;
1461 }
1462 length = index;
1463 }
1464
1465 String toString() {
1466 if (partialState != NO_PARTIAL) {
1467 if (allowMalformed) {
1468 partialState = NO_PARTIAL;
1469 addCharCode(0xFFFD);
1470 } else {
1471 int continueByteCount = (partialState & MASK_TWO_BIT);
1472 int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT;
1473 int value = partialState >> SHIFT_VALUE;
1474 int seenByteCount = continueByteCount - missing + 1;
1475 List source = new Uint8List(seenByteCount);
1476 while (seenByteCount > 1) {
1477 seenByteCount--;
1478 source[seenByteCount] = CONTINUE_TAG | (value & MASK_CONTINUE_VALUE);
1479 value >>= 6;
1480 }
1481 source[0] = value | (0x3c0 >> (continueByteCount - 1));
1482 throw new FormatException("Incomplete UTF-8 sequence",
1483 source, source.length);
1484 }
1485 }
1486 return new String.fromCharCodes(buffer, 0, length);
1487 }
1488 }
1489
1490 /**
1491 * Chunked JSON parser that parses UTF-8 chunks.
1492 */
1493 class _JsonUtf8Parser extends _ChunkedJsonParser {
1494 final bool allowMalformed;
1495 List<int> chunk;
1496 int chunkEnd;
1497
1498 _JsonUtf8Parser(_JsonListener listener, this.allowMalformed)
1499 : super(listener);
1500
1501 int getChar(int position) => chunk[position];
1502
1503 String getString(int start, int end) {
1504 beginString();
1505 addSliceToString(start, end);
1506 String result = endString();
1507 return result;
1508 }
1509
1510 void beginString() {
1511 this.buffer = new _Utf8StringBuffer(allowMalformed);
1512 }
1513
1514 void addSliceToString(int start, int end) {
1515 _Utf8StringBuffer buffer = this.buffer;
1516 buffer.addSlice(chunk, start, end);
1517 }
1518
1519 void addCharToString(int charCode) {
1520 _Utf8StringBuffer buffer = this.buffer;
1521 buffer.addCharCode(charCode);
1522 }
1523
1524 String endString() {
1525 _Utf8StringBuffer buffer = this.buffer;
1526 this.buffer = null;
1527 return buffer.toString();
1528 }
1529
1530 void copyCharsToList(int start, int end, List target, int offset) {
1531 int length = end - start;
1532 target.setRange(offset, offset + length, chunk, start);
1533 }
1534
1535 double parseDouble(int start, int end) {
1536 String string = getString(start, end);
1537 reutrn _parseDouble(string, 0, string.length);
1538 }
1539 }
1540
1541 double _parseDouble(String source, int start, int end)
1542 native "Double_parse";
1543
1544 /**
1545 * Implements the chunked conversion from a UTF-8 encoding of JSON
1546 * to its corresponding object.
1547 */
1548 class _JsonUtf8DecoderSink extends ByteConversionSinkBase {
1549 _ChunkedUtf8Parser _parser;
1550 final Sink<Object> _sink;
1551
1552 _JsonUtf8DecoderSink(reviver, this._sink, bool allowMalformed)
1553 : _parser = _createParser(reviver, allowMalformed);
1554
1555 static _ChunkedJsonParser _createParser(reviver, bool allowMalformed) {
1556 _BuildJsonListener listener;
1557 if (reviver == null) {
1558 listener = new _BuildJsonListener();
1559 } else {
1560 listener = new _ReviverJsonListener(reviver);
1561 }
1562 return new _JsonUtf8Parser(listener, allowMalformed);
1563 }
1564
1565 void addSlice(List<int> chunk, int start, int end, bool isLast) {
1566 _parser.chunk = chunk;
1567 _parser.chunkEnd = end;
1568 _parser.parse(start);
1569 if (isLast) _parser.close();
1570 }
1571
1572 void add(List<int> chunk) {
1573 addSlice(chunk, 0, chunk.length, false);
1574 }
1575
1576 void close() {
1577 _parser.close();
1578 var decoded = _parser.result;
1579 _sink.add(decoded);
1580 _sink.close();
1581 }
1582 }
OLDNEW
« no previous file with comments | « no previous file | runtime/lib/double_patch.dart » ('j') | tests/lib/convert/json_chunk_test.dart » ('J')

Powered by Google App Engine
This is Rietveld 408576698