| Index: runtime/lib/convert_patch.dart
|
| diff --git a/runtime/lib/convert_patch.dart b/runtime/lib/convert_patch.dart
|
| index 2a4ab1bc8af658b484fc94e8726a2c2afc8f8627..64795b8b4b8c4078d2fd1748908cb372da7184c4 100644
|
| --- a/runtime/lib/convert_patch.dart
|
| +++ b/runtime/lib/convert_patch.dart
|
| @@ -1,7 +1,9 @@
|
| -// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
|
| +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
|
| // for details. All rights reserved. Use of this source code is governed by a
|
| // BSD-style license that can be found in the LICENSE file.
|
|
|
| +import "dart:_internal" show POWERS_OF_TEN;
|
| +
|
| // JSON conversion.
|
|
|
| patch _parseJson(String json, reviver(var key, var value)) {
|
| @@ -11,7 +13,11 @@ patch _parseJson(String json, reviver(var key, var value)) {
|
| } else {
|
| listener = new _ReviverJsonListener(reviver);
|
| }
|
| - new _JsonParser(json, listener).parse();
|
| + var parser = new _JsonStringParser(listener);
|
| + parser.chunk = json;
|
| + parser.chunkEnd = json.length;
|
| + parser.parse(0);
|
| + parser.close();
|
| return listener.result;
|
| }
|
|
|
| @@ -19,6 +25,9 @@ patch _parseJson(String json, reviver(var key, var value)) {
|
|
|
| // Simple API for JSON parsing.
|
|
|
| +/**
|
| + * Listener for parsing events from [_ChunkedJsonParser].
|
| + */
|
| abstract class _JsonListener {
|
| void handleString(String value) {}
|
| void handleNumber(num value) {}
|
| @@ -34,7 +43,7 @@ abstract class _JsonListener {
|
| }
|
|
|
| /**
|
| - * A [JsonListener] that builds data objects from the parser events.
|
| + * A [_JsonListener] that builds data objects from the parser events.
|
| *
|
| * This is a simple stack-based object builder. It keeps the most recently
|
| * seen value in a variable, and uses it depending on the following event.
|
| @@ -135,7 +144,72 @@ class _ReviverJsonListener extends _BuildJsonListener {
|
| }
|
| }
|
|
|
| -class _JsonParser {
|
| +/**
|
| + * Buffer holding parts of a numeral.
|
| + *
|
| + * The buffer contains the characters of a JSON number.
|
| + * These are all ASCII, so an [Uint8List] is used as backing store.
|
| + *
|
| + * This buffer is used when a JSON number is split between separate chunks.
|
| + *
|
| + */
|
| +class _NumberBuffer {
|
| + static const int minCapacity = 16;
|
| + static const int kDefaultOverhead = 5;
|
| + Uint8List list;
|
| + int length = 0;
|
| + _NumberBuffer(int initialCapacity)
|
| + : list = new Uint8List(_initialCapacity(initialCapacity));
|
| +
|
| + int get capacity => list.length;
|
| +
|
| + // Pick an initial capacity greater than the first part's size.
|
| + // The typical use case has two parts, this is the attempt at
|
| + // guessing the size of the second part without overdoing it.
|
| + // The default estimate of the second part is [kDefaultOverhead],
|
| + // then round to multiplum of four, and return the result,
|
| + // or [minCapacity] if that is greater.
|
| + static int _initialCapacity(int minCapacity) {
|
| + minCapacity += kDefaultOverhead;
|
| + if (minCapacity < minCapacity) return minCapacity;
|
| + minCapacity = (minCapacity + 3) & ~3; // Round to multiple of four.
|
| + return minCapacity;
|
| + }
|
| +
|
| + // Grows to the exact size asked for.
|
| + void ensureCapacity(int newCapacity) {
|
| + Uint8List list = this.list;
|
| + if (newCapacity <= list.length) return;
|
| + Uint8List newList = new Uint8List(newCapacity);
|
| + newList.setRange(0, list.length, list, 0);
|
| + this.list = newList;
|
| + }
|
| +
|
| + String getString() {
|
| + var list = this.list;
|
| + if (length < list.length) {
|
| + list = new Uint8List.view(list.buffer, 0, length);
|
| + }
|
| + String result = new String.fromCharCodes(list);
|
| + return result;
|
| + }
|
| +
|
| + // TODO(lrn): See if parsing of numbers can be abstracted to something
|
| + // not only working on strings, but also on char-code lists, without lossing
|
| + // performance.
|
| + int parseInt() => int.parse(getString());
|
| + double parseDouble() => double.parse(getString());
|
| +}
|
| +
|
| +/**
|
| + * Chunked JSON parser.
|
| + *
|
| + * Receives inputs in chunks, gives access to individual parts of the input,
|
| + * and stores input state between chunks.
|
| + *
|
| + * Implementations include [String] and UTF-8 parsers.
|
| + */
|
| +abstract class _ChunkedJsonParser {
|
| // A simple non-recursive state-based parser for JSON.
|
| //
|
| // Literal values accepted in states ARRAY_EMPTY, ARRAY_COMMA, OBJECT_COLON
|
| @@ -172,11 +246,11 @@ class _JsonParser {
|
| static const int NO_VALUES = 12;
|
|
|
| // Objects and arrays are "empty" until their first property/element.
|
| + // At this position, they may either have an entry or a close-bracket.
|
| static const int EMPTY = 0;
|
| static const int NON_EMPTY = 16;
|
| static const int EMPTY_MASK = 16; // Empty if zero.
|
|
|
| -
|
| static const int VALUE_READ_BITS = NO_VALUES | NON_EMPTY;
|
|
|
| // Actual states.
|
| @@ -226,18 +300,466 @@ class _JsonParser {
|
| static const int LBRACE = 0x7b;
|
| static const int RBRACE = 0x7d;
|
|
|
| - final String source;
|
| + // State of partial value at chunk split.
|
| + static const int NO_PARTIAL = 0;
|
| + static const int PARTIAL_STRING = 1;
|
| + static const int PARTIAL_NUMERAL = 2;
|
| + static const int PARTIAL_KEYWORD = 3;
|
| + static const int MASK_PARTIAL = 3;
|
| +
|
| + // Partial states for numerals. Values can be |'ed with PARTIAL_NUMERAL.
|
| + static const int NUM_SIGN = 0; // After initial '-'.
|
| + static const int NUM_ZERO = 4; // After '0' as first digit.
|
| + static const int NUM_DIGIT = 8; // After digit, no '.' or 'e' seen.
|
| + static const int NUM_DOT = 12; // After '.'.
|
| + static const int NUM_DOT_DIGIT = 16; // After a decimal digit (after '.').
|
| + static const int NUM_E = 20; // After 'e' or 'E'.
|
| + static const int NUM_E_SIGN = 24; // After '-' or '+' after 'e' or 'E'.
|
| + static const int NUM_E_DIGIT = 28; // After exponent digit.
|
| + static const int NUM_SUCCESS = 32; // Never stored as partial state.
|
| +
|
| + // Partial states for strings.
|
| + static const int STR_PLAIN = 0; // Inside string, but not escape.
|
| + static const int STR_ESCAPE = 4; // After '\'.
|
| + static const int STR_U = 16; // After '\u' and 0-3 hex digits.
|
| + static const int STR_U_COUNT_SHIFT = 2; // Hex digit count in bits 2-3.
|
| + static const int STR_U_VALUE_SHIFT = 5; // Hex digit value in bits 5+.
|
| +
|
| + // Partial states for keywords.
|
| + static const int KWD_TYPE_MASK = 12;
|
| + static const int KWD_TYPE_SHIFT = 2;
|
| + static const int KWD_NULL = 0; // Prefix of "null" seen.
|
| + static const int KWD_TRUE = 4; // Prefix of "true" seen.
|
| + static const int KWD_FALSE = 8; // Prefix of "false" seen.
|
| + static const int KWD_COUNT_SHIFT = 4; // Prefix length in bits 4+.
|
| +
|
| + // Mask used to mask off two lower bits.
|
| + static const int TWO_BIT_MASK = 3;
|
| +
|
| final _JsonListener listener;
|
| - _JsonParser(this.source, this.listener);
|
| -
|
| - /** Parses [source], or throws if it fails. */
|
| - void parse() {
|
| - final List<int> states = <int>[];
|
| - int state = STATE_INITIAL;
|
| - int position = 0;
|
| - int length = source.length;
|
| +
|
| + // The current parsing state.
|
| + int state = STATE_INITIAL;
|
| + List<int> states = <int>[];
|
| +
|
| + /**
|
| + * Stores tokenizer state between chunks.
|
| + *
|
| + * This state is stored when a chunk stops in the middle of a
|
| + * token (string, numeral, boolean or null).
|
| + *
|
| + * The partial state is used to continue parsing on the next chunk.
|
| + * The previous chunk is not retained, any data needed are stored in
|
| + * this integer, or in the [buffer] field as a string-building buffer
|
| + * or a [_NumberBuffer].
|
| + *
|
| + * Prefix state stored in [prefixState] as bits.
|
| + *
|
| + * ..00 : No partial value (NO_PARTIAL).
|
| + *
|
| + * ..00001 : Partial string, not inside escape.
|
| + * ..00101 : Partial string, after '\'.
|
| + * ..vvvv1dd01 : Partial \u escape.
|
| + * The 'dd' bits (2-3) encode the number of hex digits seen.
|
| + * Bits 5-16 encode the value of the hex digits seen so far.
|
| + *
|
| + * ..0ddd10 : Partial numeral.
|
| + * The `ddd` bits store the parts of in the numeral seen so
|
| + * far, as the constants `NUM_*` defined above.
|
| + * The characters of the numeral are stored in [buffer]
|
| + * as a [_NumberBuffer].
|
| + *
|
| + * ..0ddd0011 : Partial 'null' keyword.
|
| + * ..0ddd0111 : Partial 'true' keyword.
|
| + * ..0ddd1011 : Partial 'false' keyword.
|
| + * For all three keywords, the `ddd` bits encode the number
|
| + * of letters seen.
|
| + */
|
| + int partialState = NO_PARTIAL;
|
| +
|
| + /**
|
| + * Extra data stored while parsing a primitive value.
|
| + * May be set during parsing, always set at chunk end if a value is partial.
|
| + *
|
| + * May contain a string buffer while parsing strings.
|
| + */
|
| + var buffer = null;
|
| +
|
| + _ChunkedJsonParser(this.listener);
|
| +
|
| + /**
|
| + * Push the current parse [state] on a stack.
|
| + *
|
| + * State is pushed when a new array or object literal starts,
|
| + * so the parser can go back to the correct value when the literal ends.
|
| + */
|
| + void saveState(int state) {
|
| + states.add(state);
|
| + }
|
| +
|
| + /**
|
| + * Restore a state pushed with [saveState].
|
| + */
|
| + int restoreState() {
|
| + return states.removeLast(); // Throws if empty.
|
| + }
|
| +
|
| + /**
|
| + * Finalizes the parsing.
|
| + *
|
| + * Throws if the source read so far doesn't end up with a complete
|
| + * parsed value. That means it must not be inside a list or object
|
| + * literal, and any partial value read should also be a valid complete
|
| + * value.
|
| + *
|
| + * The only valid partial state is a number that ends in a digit, and
|
| + * only if the number is the entire JSON value being parsed
|
| + * (otherwise it would be inside a list or object).
|
| + * Such a number will be completed. Any other partial state is an error.
|
| + */
|
| + void close() {
|
| + if (partialState != NO_PARTIAL) {
|
| + int partialType = partialState & MASK_PARTIAL;
|
| + if (partialType == PARTIAL_NUMERAL) {
|
| + int numState = partialState & ~MASK_PARTIAL;
|
| + // A partial number might be a valid number if we know it's done.
|
| + // There is an unnecessary overhead if input is a single number,
|
| + // but this is assumed to be rare.
|
| + _NumberBuffer buffer = this.buffer;
|
| + this.buffer = null;
|
| + finishChunkNumber(numState, 0, 0, buffer);
|
| + } else if (partialType == PARTIAL_STRING) {
|
| + fail(chunkEnd, "Unterminated string");
|
| + } else {
|
| + assert(partialType == PARTIAL_KEYWORD);
|
| + fail(chunkEnd); // Incomplete literal.
|
| + }
|
| + }
|
| + if (state != STATE_END) {
|
| + fail(chunkEnd);
|
| + }
|
| + }
|
| +
|
| + /**
|
| + * Read out the result after successfully closing the parser.
|
| + *
|
| + * The parser is closed by calling [close] or calling [addSourceChunk] with
|
| + * `true` as second (`isLast`) argument.
|
| + */
|
| + Object get result {
|
| + return listener.result;
|
| + }
|
| +
|
| + /** Sets the current source chunk. */
|
| + void set chunk(var source);
|
| +
|
| + /**
|
| + * Length of current chunk.
|
| + *
|
| + * The valid arguments to [getChar] are 0 .. `chunkEnd - 1`.
|
| + */
|
| + int get chunkEnd;
|
| +
|
| + /**
|
| + * Returns the chunk itself.
|
| + *
|
| + * Only used by [fail] to include the chunk in the thrown [FormatException].
|
| + */
|
| + get chunk;
|
| +
|
| + /**
|
| + * Get charcacter/code unit of current chunk.
|
| + *
|
| + * The [index] must be non-negative and less than `chunkEnd`.
|
| + * In practive, [index] will be no smaller than the `start` argument passed
|
| + * to [parse].
|
| + */
|
| + int getChar(int index);
|
| +
|
| + /**
|
| + * Copy ASCII characters from start to end of chunk into a list.
|
| + *
|
| + * Used for number buffer (always copies ASCII, so encoding is not important).
|
| + */
|
| + void copyCharsToList(int start, int end, List<int> target);
|
| +
|
| + /**
|
| + * Build a string using input code units.
|
| + *
|
| + * Creates a string buffer and enables adding characters and slices
|
| + * to that buffer.
|
| + * The buffer is stored in the [buffer] field. If the string is unterminated,
|
| + * the same buffer is used to continue parsing in the next chunk.
|
| + */
|
| + void beginString();
|
| + /**
|
| + * Add single character code to string being built.
|
| + *
|
| + * Used for unparsed escape sequences.
|
| + */
|
| + void addCharToString(int charCode);
|
| +
|
| + /**
|
| + * Adds slice of current chunk to string being built.
|
| + *
|
| + * The [start] positions is inclusive, [end] is exclusive.
|
| + */
|
| + void addSliceToString(int start, int end);
|
| +
|
| + /** Finalizes the string being built and returns it as a String. */
|
| + String endString();
|
| +
|
| + /**
|
| + * Extracts a literal string from a slice of the current chunk.
|
| + *
|
| + * No interpretation of the content is performed, except for converting
|
| + * the source format to string.
|
| + * This can be implemented more or less efficiently depending on the
|
| + * underlying source.
|
| + *
|
| + * This is used for string literals that contain no escapes.
|
| + */
|
| + String getString(int start, int end);
|
| +
|
| + /**
|
| + * Parse a slice of the current chunk as an integer.
|
| + *
|
| + * The format is expected to be correct.
|
| + */
|
| + int parseInt(int start, int end) {
|
| + return int.parse(getString(start, end));
|
| + }
|
| +
|
| + /**
|
| + * Parse a slice of the current chunk as a double.
|
| + *
|
| + * The format is expected to be correct.
|
| + * This is used by [parseNumber] when the double value cannot be
|
| + * built exactly during parsing.
|
| + */
|
| + double parseDouble(int start, int end) {
|
| + return double.parse(getString(start, end));
|
| + }
|
| +
|
| + /**
|
| + * Create a _NumberBuffer containing the digits from [start] to [chunkEnd].
|
| + *
|
| + * This creates a number buffer and initializes it with the part of the
|
| + * number literal ending the current chunk
|
| + */
|
| + void createNumberBuffer(int start) {
|
| + assert(start >= 0);
|
| + assert(start < chunkEnd);
|
| + int length = chunkEnd - start;
|
| + var buffer = new _NumberBuffer(length);
|
| + copyCharsToList(start, chunkEnd, buffer.list);
|
| + buffer.length = length;
|
| + return buffer;
|
| + }
|
| +
|
| + /**
|
| + * Continues parsing a partial value.
|
| + */
|
| + int parsePartial(int position) {
|
| + if (position == chunkEnd) return position;
|
| + int partialState = this.partialState;
|
| + assert(partialState != NO_PARTIAL);
|
| + int partialType = partialState & MASK_PARTIAL;
|
| + this.partialState = NO_PARTIAL;
|
| + partialState = partialState & ~MASK_PARTIAL;
|
| + assert(partialType != 0);
|
| + if (partialType == PARTIAL_STRING) {
|
| + position = parsePartialString(position, partialState);
|
| + } else if (partialType == PARTIAL_NUMERAL) {
|
| + position = parsePartialNumber(position, partialState);
|
| + } else if (partialType == PARTIAL_KEYWORD) {
|
| + position = parsePartialKeyword(position, partialState);
|
| + }
|
| + return position;
|
| + }
|
| +
|
| + /**
|
| + * Parses the remainder of a number into the number buffer.
|
| + *
|
| + * Syntax is checked while pasing.
|
| + * Starts at position, which is expected to be the start of the chunk,
|
| + * and returns the index of the first non-number-literal character found,
|
| + * or chunkEnd if the entire chunk is a valid number continuation.
|
| + * Throws if a syntax error is detected.
|
| + */
|
| + int parsePartialNumber(int position, int state) {
|
| + int start = position;
|
| + // Primitive implementation, can be optimized.
|
| + _NumberBuffer buffer = this.buffer;
|
| + this.buffer = null;
|
| + int end = chunkEnd;
|
| + toBailout: {
|
| + if (position == end) break toBailout;
|
| + int char = getChar(position);
|
| + int digit = char ^ CHAR_0;
|
| + if (state == NUM_SIGN) {
|
| + if (digit <= 9) {
|
| + if (digit == 0) {
|
| + state = NUM_ZERO;
|
| + } else {
|
| + state = NUM_DIGIT;
|
| + }
|
| + position++;
|
| + if (position == end) break toBailout;
|
| + char = getChar(position);
|
| + digit = char ^ CHAR_0;
|
| + } else {
|
| + return fail(position);
|
| + }
|
| + }
|
| + if (state == NUM_ZERO) {
|
| + // JSON does not allow insignificant leading zeros (e.g., "09").
|
| + if (digit <= 9) return fail(position);
|
| + state = NUM_DIGIT;
|
| + }
|
| + while (state == NUM_DIGIT) {
|
| + if (digit > 9) {
|
| + if (char == DECIMALPOINT) {
|
| + state = NUM_DOT;
|
| + } else if ((char | 0x20) == CHAR_e) {
|
| + state = NUM_E;
|
| + } else {
|
| + finishChunkNumber(state, start, position, buffer);
|
| + return position;
|
| + }
|
| + }
|
| + position++;
|
| + if (position == end) break toBailout;
|
| + char = getChar(position);
|
| + digit = char ^ CHAR_0;
|
| + }
|
| + if (state == NUM_DOT) {
|
| + if (digit > 9) return fail(position);
|
| + state = NUM_DOT_DIGIT;
|
| + }
|
| + while (state == NUM_DOT_DIGIT) {
|
| + if (digit > 9) {
|
| + if ((char | 0x20) == CHAR_e) {
|
| + state = NUM_E;
|
| + } else {
|
| + finishChunkNumber(state, start, position, buffer);
|
| + return position;
|
| + }
|
| + }
|
| + position++;
|
| + if (position == end) break toBailout;
|
| + char = getChar(position);
|
| + digit = char ^ CHAR_0;
|
| + }
|
| + if (state == NUM_E) {
|
| + if (char == PLUS || char == MINUS) {
|
| + state = NUM_E_SIGN;
|
| + position++;
|
| + if (position == end) break toBailout;
|
| + char = getChar(position);
|
| + digit = char ^ CHAR_0;
|
| + }
|
| + }
|
| + assert(state >= NUM_E);
|
| + while (digit <= 9) {
|
| + state = NUM_E_DIGIT;
|
| + position++;
|
| + if (position == end) break toBailout;
|
| + char = getChar(position);
|
| + digit = char ^ CHAR_0;
|
| + }
|
| + finishChunkNumber(state, start, position, buffer);
|
| + return position;
|
| + }
|
| + // Bailout code in case the current chunk ends while parsing the numeral.
|
| + assert(position == end);
|
| + continueChunkNumber(state, start, buffer);
|
| + return chunkEnd;
|
| + }
|
| +
|
| + /**
|
| + * Continues parsing a partial string literal.
|
| + *
|
| + * Handles partial escapes and then hands the parsing off to
|
| + * [parseStringToBuffer].
|
| + */
|
| + int parsePartialString(int position, int partialState) {
|
| + if (partialState == STR_PLAIN) {
|
| + return parseStringToBuffer(position);
|
| + }
|
| + if (partialState == STR_ESCAPE) {
|
| + position = parseStringEscape(position);
|
| + // parseStringEscape sets partialState if it sees the end.
|
| + if (position == chunkEnd) return position;
|
| + return parseStringToBuffer(position);
|
| + }
|
| + assert((partialState & STR_U) != 0);
|
| + int value = partialState >> STR_U_VALUE_SHIFT;
|
| + int count = (partialState >> STR_U_COUNT_SHIFT) & TWO_BIT_MASK;
|
| + for (int i = count; i < 4; i++, position++) {
|
| + if (position == chunkEnd) return chunkStringEscapeU(i, value);
|
| + int char = getChar(position);
|
| + int digit = parseHexDigit(char);
|
| + if (digit < 0) fail(position, "Invalid hex digit");
|
| + value = 16 * value + digit;
|
| + }
|
| + addCharToString(value);
|
| + return parseStringToBuffer(position);
|
| + }
|
| +
|
| + /**
|
| + * Continues parsing a partial keyword.
|
| + */
|
| + int parsePartialKeyword(int position, int partialState) {
|
| + int keywordType = partialState & KWD_TYPE_MASK;
|
| + int count = partialState >> KWD_COUNT_SHIFT;
|
| + int keywordTypeIndex = keywordType >> KWD_TYPE_SHIFT;
|
| + String keyword = const ["null", "true", "false"][keywordTypeIndex];
|
| + assert(count < keyword.length);
|
| + do {
|
| + if (position == chunkEnd) {
|
| + this.partialState =
|
| + PARTIAL_KEYWORD | keywordType | (count << KWD_COUNT_SHIFT);
|
| + return chunkEnd;
|
| + }
|
| + int expectedChar = keyword.codeUnitAt(count);
|
| + if (getChar(position) != expectedChar) return fail(position);
|
| + position++;
|
| + count++;
|
| + } while (count < keyword.length);
|
| + if (keywordType == KWD_NULL) {
|
| + listener.handleNull();
|
| + } else {
|
| + listener.handleBool(keywordType == KWD_TRUE);
|
| + }
|
| + return position;
|
| + }
|
| +
|
| + /** Convert hex-digit to its value. Returns -1 if char is not a hex digit. */
|
| + int parseHexDigit(int char) {
|
| + int digit = char ^ 0x30;
|
| + if (digit <= 9) return digit;
|
| + int letter = (char | 0x20) ^ 0x60;
|
| + // values 1 .. 6 are 'a' through 'f'
|
| + if (letter <= 6 && letter > 0) return letter + 9;
|
| + return -1;
|
| + }
|
| +
|
| + /**
|
| + * Parses the current chunk as a chunk of JSON.
|
| + *
|
| + * Starts parsing at [position] and continues until [chunkEnd].
|
| + * Continues parsing where the previous chunk (if any) ended.
|
| + */
|
| + void parse(int position) {
|
| + int length = chunkEnd;
|
| + if (partialState != NO_PARTIAL) {
|
| + position = parsePartial(position);
|
| + if (position == length) return;
|
| + }
|
| + int state = this.state;
|
| while (position < length) {
|
| - int char = source.codeUnitAt(position);
|
| + int char = getChar(position);
|
| switch (char) {
|
| case SPACE:
|
| case CARRIAGE_RETURN:
|
| @@ -246,41 +768,41 @@ class _JsonParser {
|
| position++;
|
| break;
|
| case QUOTE:
|
| - if ((state & ALLOW_STRING_MASK) != 0) fail(position);
|
| - position = parseString(position + 1);
|
| + if ((state & ALLOW_STRING_MASK) != 0) return fail(position);
|
| state |= VALUE_READ_BITS;
|
| + position = parseString(position + 1);
|
| break;
|
| case LBRACKET:
|
| - if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
|
| + if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
|
| listener.beginArray();
|
| - states.add(state);
|
| + saveState(state);
|
| state = STATE_ARRAY_EMPTY;
|
| position++;
|
| break;
|
| case LBRACE:
|
| - if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
|
| + if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
|
| listener.beginObject();
|
| - states.add(state);
|
| + saveState(state);
|
| state = STATE_OBJECT_EMPTY;
|
| position++;
|
| break;
|
| case CHAR_n:
|
| - if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
|
| - position = parseNull(position);
|
| + if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
|
| state |= VALUE_READ_BITS;
|
| + position = parseNull(position);
|
| break;
|
| case CHAR_f:
|
| - if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
|
| - position = parseFalse(position);
|
| + if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
|
| state |= VALUE_READ_BITS;
|
| + position = parseFalse(position);
|
| break;
|
| case CHAR_t:
|
| - if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
|
| - position = parseTrue(position);
|
| + if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
|
| state |= VALUE_READ_BITS;
|
| + position = parseTrue(position);
|
| break;
|
| case COLON:
|
| - if (state != STATE_OBJECT_KEY) fail(position);
|
| + if (state != STATE_OBJECT_KEY) return fail(position);
|
| listener.propertyName();
|
| state = STATE_OBJECT_COLON;
|
| position++;
|
| @@ -295,7 +817,7 @@ class _JsonParser {
|
| state = STATE_ARRAY_COMMA;
|
| position++;
|
| } else {
|
| - fail(position);
|
| + return fail(position);
|
| }
|
| break;
|
| case RBRACKET:
|
| @@ -305,9 +827,9 @@ class _JsonParser {
|
| listener.arrayElement();
|
| listener.endArray();
|
| } else {
|
| - fail(position);
|
| + return fail(position);
|
| }
|
| - state = states.removeLast() | VALUE_READ_BITS;
|
| + state = restoreState() | VALUE_READ_BITS;
|
| position++;
|
| break;
|
| case RBRACE:
|
| @@ -317,19 +839,19 @@ class _JsonParser {
|
| listener.propertyValue();
|
| listener.endObject();
|
| } else {
|
| - fail(position);
|
| + return fail(position);
|
| }
|
| - state = states.removeLast() | VALUE_READ_BITS;
|
| + state = restoreState() | VALUE_READ_BITS;
|
| position++;
|
| break;
|
| default:
|
| if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
|
| - position = parseNumber(char, position);
|
| state |= VALUE_READ_BITS;
|
| + position = parseNumber(char, position);
|
| break;
|
| }
|
| }
|
| - if (state != STATE_END) fail(position);
|
| + this.state = state;
|
| }
|
|
|
| /**
|
| @@ -338,12 +860,14 @@ class _JsonParser {
|
| * [:source[position]:] must be "t".
|
| */
|
| int parseTrue(int position) {
|
| - assert(source.codeUnitAt(position) == CHAR_t);
|
| - if (source.length < position + 4) fail(position, "Unexpected identifier");
|
| - if (source.codeUnitAt(position + 1) != CHAR_r ||
|
| - source.codeUnitAt(position + 2) != CHAR_u ||
|
| - source.codeUnitAt(position + 3) != CHAR_e) {
|
| - fail(position);
|
| + assert(getChar(position) == CHAR_t);
|
| + if (chunkEnd < position + 4) {
|
| + return parseKeywordPrefix(position, "true", KWD_TRUE);
|
| + }
|
| + if (getChar(position + 1) != CHAR_r ||
|
| + getChar(position + 2) != CHAR_u ||
|
| + getChar(position + 3) != CHAR_e) {
|
| + return fail(position);
|
| }
|
| listener.handleBool(true);
|
| return position + 4;
|
| @@ -355,13 +879,15 @@ class _JsonParser {
|
| * [:source[position]:] must be "f".
|
| */
|
| int parseFalse(int position) {
|
| - assert(source.codeUnitAt(position) == CHAR_f);
|
| - if (source.length < position + 5) fail(position, "Unexpected identifier");
|
| - if (source.codeUnitAt(position + 1) != CHAR_a ||
|
| - source.codeUnitAt(position + 2) != CHAR_l ||
|
| - source.codeUnitAt(position + 3) != CHAR_s ||
|
| - source.codeUnitAt(position + 4) != CHAR_e) {
|
| - fail(position);
|
| + assert(getChar(position) == CHAR_f);
|
| + if (chunkEnd < position + 5) {
|
| + return parseKeywordPrefix(position, "false", KWD_FALSE);
|
| + }
|
| + if (getChar(position + 1) != CHAR_a ||
|
| + getChar(position + 2) != CHAR_l ||
|
| + getChar(position + 3) != CHAR_s ||
|
| + getChar(position + 4) != CHAR_e) {
|
| + return fail(position);
|
| }
|
| listener.handleBool(false);
|
| return position + 5;
|
| @@ -373,17 +899,33 @@ class _JsonParser {
|
| * [:source[position]:] must be "n".
|
| */
|
| int parseNull(int position) {
|
| - assert(source.codeUnitAt(position) == CHAR_n);
|
| - if (source.length < position + 4) fail(position, "Unexpected identifier");
|
| - if (source.codeUnitAt(position + 1) != CHAR_u ||
|
| - source.codeUnitAt(position + 2) != CHAR_l ||
|
| - source.codeUnitAt(position + 3) != CHAR_l) {
|
| - fail(position);
|
| + assert(getChar(position) == CHAR_n);
|
| + if (chunkEnd < position + 4) {
|
| + return parseKeywordPrefix(position, "null", KWD_NULL);
|
| + }
|
| + if (getChar(position + 1) != CHAR_u ||
|
| + getChar(position + 2) != CHAR_l ||
|
| + getChar(position + 3) != CHAR_l) {
|
| + return fail(position);
|
| }
|
| listener.handleNull();
|
| return position + 4;
|
| }
|
|
|
| + int parseKeywordPrefix(int position, String chars, int type) {
|
| + assert(getChar(position) == chars.codeUnitAt(0));
|
| + int length = chunkEnd;
|
| + int start = position;
|
| + int count = 1;
|
| + while (++position < length) {
|
| + int char = getChar(position);
|
| + if (char != chars.codeUnitAt(count)) return fail(start);
|
| + count++;
|
| + }
|
| + this.partialState = PARTIAL_KEYWORD | type | (count << KWD_COUNT_SHIFT);
|
| + return length;
|
| + }
|
| +
|
| /**
|
| * Parses a string value.
|
| *
|
| @@ -394,92 +936,207 @@ class _JsonParser {
|
| // Format: '"'([^\x00-\x1f\\\"]|'\\'[bfnrt/\\"])*'"'
|
| // Initial position is right after first '"'.
|
| int start = position;
|
| - while (position < source.length) {
|
| - int char = source.codeUnitAt(position++);
|
| + int end = chunkEnd;
|
| + while (position < end) {
|
| + int char = getChar(position++);
|
| // BACKSLASH is larger than QUOTE and SPACE.
|
| if (char > BACKSLASH) {
|
| continue;
|
| }
|
| if (char == BACKSLASH) {
|
| - return parseStringWithEscapes(start, position - 1);
|
| + beginString();
|
| + addSliceToString(start, position - 1);
|
| + return parseStringToBuffer(position - 1);
|
| }
|
| if (char == QUOTE) {
|
| - listener.handleString(source.substring(start, position - 1));
|
| + listener.handleString(getString(start, position - 1));
|
| return position;
|
| }
|
| if (char < SPACE) {
|
| fail(position - 1, "Control character in string");
|
| }
|
| }
|
| - fail(start - 1, "Unterminated string");
|
| + beginString();
|
| + addSliceToString(start, end);
|
| + return chunkString(STR_PLAIN);
|
| }
|
|
|
| - int parseStringWithEscapes(start, position) {
|
| - // Backslash escape detected. Collect character codes for rest of string.
|
| - int firstEscape = position;
|
| - List<int> chars = <int>[];
|
| - for (int i = start; i < firstEscape; i++) {
|
| - chars.add(source.codeUnitAt(i));
|
| - }
|
| - position++;
|
| + /**
|
| + * Sets up a partial string state.
|
| + *
|
| + * The state is either not inside an escape, or right after a backslash.
|
| + * For partial strings ending inside a Unicode escape, use
|
| + * [chunkStringEscapeU].
|
| + */
|
| + int chunkString(int stringState) {
|
| + partialState = PARTIAL_STRING | stringState;
|
| + return chunkEnd;
|
| + }
|
| +
|
| + /**
|
| + * Sets up a partial string state for a partially parsed Unicode escape.
|
| + *
|
| + * The partial string state includes the current [buffer] and the
|
| + * number of hex digits of the Unicode seen so far (e.g., for `"\u30')
|
| + * the state knows that two digits have been seen, and what their value is.
|
| + *
|
| + * Returns [chunkEnd] so it can be used as part of a return statement.
|
| + */
|
| + int chunkStringEscapeU(int count, int value) {
|
| + partialState = PARTIAL_STRING | STR_U |
|
| + (count << STR_U_COUNT_SHIFT) |
|
| + (value << STR_U_VALUE_SHIFT);
|
| + return chunkEnd;
|
| + }
|
| +
|
| + /**
|
| + * Parses the remainder of a string literal into a buffer.
|
| + *
|
| + * The buffer is stored in [buffer] and its underlying format depends on
|
| + * the input chunk type. For example UTF-8 decoding happens in the
|
| + * buffer, not in the parser, since all significant JSON characters are ASCII.
|
| + *
|
| + * This function scans through the string literal for escapes, and copies
|
| + * slices of non-escape characters using [addSliceToString].
|
| + */
|
| + int parseStringToBuffer(position) {
|
| + int end = chunkEnd;
|
| + int start = position;
|
| while (true) {
|
| - if (position == source.length) {
|
| - fail(start - 1, "Unterminated string");
|
| + if (position == end) {
|
| + if (position > start) {
|
| + addSliceToString(start, position);
|
| + }
|
| + return chunkString(STR_PLAIN);
|
| }
|
| - int char = source.codeUnitAt(position);
|
| - switch (char) {
|
| - case CHAR_b: char = BACKSPACE; break;
|
| - case CHAR_f: char = FORM_FEED; break;
|
| - case CHAR_n: char = NEWLINE; break;
|
| - case CHAR_r: char = CARRIAGE_RETURN; break;
|
| - case CHAR_t: char = TAB; break;
|
| - case SLASH:
|
| - case BACKSLASH:
|
| - case QUOTE:
|
| - break;
|
| - case CHAR_u:
|
| - int hexStart = position - 1;
|
| - int value = 0;
|
| - for (int i = 0; i < 4; i++) {
|
| - position++;
|
| - if (position == source.length) {
|
| - fail(start - 1, "Unterminated string");
|
| - }
|
| - char = source.codeUnitAt(position);
|
| - char -= 0x30;
|
| - if (char < 0) fail(hexStart, "Invalid unicode escape");
|
| - if (char < 10) {
|
| - value = value * 16 + char;
|
| - } else {
|
| - char = (char | 0x20) - 0x31;
|
| - if (char < 0 || char > 5) {
|
| - fail(hexStart, "Invalid unicode escape");
|
| - }
|
| - value = value * 16 + char + 10;
|
| - }
|
| - }
|
| - char = value;
|
| - break;
|
| - default:
|
| - if (char < SPACE) fail(position, "Control character in string");
|
| - fail(position, "Unrecognized string escape");
|
| + int char = getChar(position++);
|
| + if (char > BACKSLASH) continue;
|
| + if (char < SPACE) {
|
| + fail(position - 1); // Control character in string.
|
| + return;
|
| }
|
| - do {
|
| - chars.add(char);
|
| - position++;
|
| - if (position == source.length) fail(start - 1, "Unterminated string");
|
| - char = source.codeUnitAt(position);
|
| - if (char == QUOTE) {
|
| - String result = new String.fromCharCodes(chars);
|
| - listener.handleString(result);
|
| - return position + 1;
|
| + if (char == QUOTE) {
|
| + int quotePosition = position - 1;
|
| + if (quotePosition > start) {
|
| + addSliceToString(start, quotePosition);
|
| }
|
| - if (char < SPACE) {
|
| - fail(position, "Control character in string");
|
| + listener.handleString(endString());
|
| + return position;
|
| + }
|
| + if (char != BACKSLASH) {
|
| + continue;
|
| + }
|
| + // Handle escape.
|
| + if (position - 1 > start) {
|
| + addSliceToString(start, position - 1);
|
| + }
|
| + if (position == end) return chunkString(STR_ESCAPE);
|
| + position = parseStringEscape(position);
|
| + if (position == end) return position;
|
| + start = position;
|
| + }
|
| + return -1; // UNREACHABLE.
|
| + }
|
| +
|
| + /**
|
| + * Parse a string escape.
|
| + *
|
| + * Position is right after the initial backslash.
|
| + * The following escape is parsed into a character code which is added to
|
| + * the current string buffer using [addCharToString].
|
| + *
|
| + * Returns position after the last character of the escape.
|
| + */
|
| + int parseStringEscape(int position) {
|
| + int char = getChar(position++);
|
| + int length = chunkEnd;
|
| + switch (char) {
|
| + case CHAR_b: char = BACKSPACE; break;
|
| + case CHAR_f: char = FORM_FEED; break;
|
| + case CHAR_n: char = NEWLINE; break;
|
| + case CHAR_r: char = CARRIAGE_RETURN; break;
|
| + case CHAR_t: char = TAB; break;
|
| + case SLASH:
|
| + case BACKSLASH:
|
| + case QUOTE:
|
| + break;
|
| + case CHAR_u:
|
| + int hexStart = position - 1;
|
| + int value = 0;
|
| + for (int i = 0; i < 4; i++) {
|
| + if (position == length) return chunkStringEscapeU(i, value);
|
| + char = getChar(position++);
|
| + int digit = char ^ 0x30;
|
| + value *= 16;
|
| + if (digit <= 9) {
|
| + value += digit;
|
| + } else {
|
| + digit = (char | 0x20) - CHAR_a;
|
| + if (digit < 0 || digit > 5) {
|
| + return fail(hexStart, "Invalid unicode escape");
|
| + }
|
| + value += digit + 10;
|
| + }
|
| }
|
| - } while (char != BACKSLASH);
|
| - position++;
|
| + char = value;
|
| + break;
|
| + default:
|
| + if (char < SPACE) return fail(position, "Control character in string");
|
| + return fail(position, "Unrecognized string escape");
|
| }
|
| + addCharToString(char);
|
| + if (position == length) return chunkString(STR_PLAIN);
|
| + return position;
|
| + }
|
| +
|
| + /// Sets up a partial numeral state.
|
| + /// Returns chunkEnd to allow easy one-line bailout tests.
|
| + int beginChunkNumber(int state, int start) {
|
| + int end = chunkEnd;
|
| + int length = end - start;
|
| + var buffer = new _NumberBuffer(length);
|
| + copyCharsToList(start, end, buffer.list, 0);
|
| + buffer.length = length;
|
| + this.buffer = buffer;
|
| + this.partialState = PARTIAL_NUMERAL | state;
|
| + return end;
|
| + }
|
| +
|
| + void addNumberChunk(_NumberBuffer buffer, int start, int end, int overhead) {
|
| + int length = end - start;
|
| + int count = buffer.length;
|
| + int newCount = count + length;
|
| + int newCapacity = newCount + overhead;
|
| + buffer.ensureCapacity(newCapacity);
|
| + copyCharsToList(start, end, buffer.list, count);
|
| + buffer.length = newCount;
|
| + }
|
| +
|
| + // Continues an already chunked number accross an entire chunk.
|
| + int continueChunkNumber(int state, int start, _NumberBuffer buffer) {
|
| + int end = chunkEnd;
|
| + addNumberChunk(buffer, start, end, _NumberBuffer.kDefaultOverhead);
|
| + this.buffer = buffer;
|
| + this.partialState = PARTIAL_NUMERAL | state;
|
| + return end;
|
| + }
|
| +
|
| + int finishChunkNumber(int state, int start, int end, _NumberBuffer buffer) {
|
| + if (state == NUM_ZERO) {
|
| + listener.handleNumber(0);
|
| + return;
|
| + }
|
| + if (end > start) {
|
| + addNumberChunk(buffer, start, end, 0);
|
| + }
|
| + if (state == NUM_DIGIT) {
|
| + listener.handleNumber(buffer.parseInt());
|
| + } else if (state == NUM_DOT_DIGIT || state == NUM_E_DIGIT) {
|
| + listener.handleNumber(buffer.parseDouble());
|
| + } else {
|
| + fail(chunkEnd, "Unterminated number literal");
|
| + }
|
| + return end;
|
| }
|
|
|
| int parseNumber(int char, int position) {
|
| @@ -487,89 +1144,576 @@ class _JsonParser {
|
| // Format:
|
| // '-'?('0'|[1-9][0-9]*)('.'[0-9]+)?([eE][+-]?[0-9]+)?
|
| int start = position;
|
| - int length = source.length;
|
| - int intValue = 0; // Collect int value while parsing.
|
| - int intSign = 1;
|
| + int length = chunkEnd;
|
| + // Collects an int value while parsing. Used for both an integer literal,
|
| + // an the exponent part of a double literal.
|
| + int intValue = 0;
|
| + double doubleValue = 0.0; // Collect double value while parsing.
|
| + int sign = 1;
|
| bool isDouble = false;
|
| // Break this block when the end of the number literal is reached.
|
| // At that time, position points to the next character, and isDouble
|
| // is set if the literal contains a decimal point or an exponential.
|
| parsing: {
|
| if (char == MINUS) {
|
| - intSign = -1;
|
| + sign = -1;
|
| position++;
|
| - if (position == length) fail(position, "Missing expected digit");
|
| - char = source.codeUnitAt(position);
|
| + if (position == length) return beginChunkNumber(NUM_SIGN, start);
|
| + char = getChar(position);
|
| }
|
| - if (char < CHAR_0 || char > CHAR_9) {
|
| - if (intSign < 0) {
|
| + int digit = char ^ CHAR_0;
|
| + if (digit > 9) {
|
| + if (sign < 0) {
|
| fail(position, "Missing expected digit");
|
| } else {
|
| // If it doesn't even start out as a numeral.
|
| fail(position, "Unexpected character");
|
| }
|
| }
|
| - if (char == CHAR_0) {
|
| + if (digit == 0) {
|
| position++;
|
| - if (position == length) break parsing;
|
| - char = source.codeUnitAt(position);
|
| - if (CHAR_0 <= char && char <= CHAR_9) {
|
| - fail(position);
|
| - }
|
| + if (position == length) return beginChunkNumber(NUM_ZERO, start);
|
| + char = getChar(position);
|
| + digit = char ^ CHAR_0;
|
| + // If starting with zero, next character must not be digit.
|
| + if (digit <= 9) fail(position);
|
| } else {
|
| do {
|
| - intValue = intValue * 10 + (char - CHAR_0);
|
| + intValue = 10 * intValue + digit;
|
| position++;
|
| - if (position == length) break parsing;
|
| - char = source.codeUnitAt(position);
|
| - } while (CHAR_0 <= char && char <= CHAR_9);
|
| + if (position == length) return beginChunkNumber(NUM_DIGIT, start);
|
| + char = getChar(position);
|
| + digit = char ^ CHAR_0;
|
| + } while (digit <= 9);
|
| }
|
| if (char == DECIMALPOINT) {
|
| isDouble = true;
|
| + doubleValue = intValue.toDouble();
|
| + intValue = 0;
|
| position++;
|
| - if (position == length) fail(position, "Missing expected digit");
|
| - char = source.codeUnitAt(position);
|
| - if (char < CHAR_0 || char > CHAR_9) fail(position);
|
| + if (position == length) return beginChunkNumber(NUM_DOT, start);
|
| + char = getChar(position);
|
| + digit = char ^ CHAR_0;
|
| + if (digit > 9) fail(position);
|
| do {
|
| + doubleValue = 10.0 * doubleValue + digit;
|
| + intValue -= 1;
|
| position++;
|
| - if (position == length) break parsing;
|
| - char = source.codeUnitAt(position);
|
| - } while (CHAR_0 <= char && char <= CHAR_9);
|
| + if (position == length) return beginChunkNumber(NUM_DOT_DIGIT, start);
|
| + char = getChar(position);
|
| + digit = char ^ CHAR_0;
|
| + } while (digit <= 9);
|
| }
|
| - if (char == CHAR_e || char == CHAR_E) {
|
| - isDouble = true;
|
| + if ((char | 0x20) == CHAR_e) {
|
| + if (!isDouble) {
|
| + doubleValue = intValue.toDouble();
|
| + intValue = 0;
|
| + isDouble = true;
|
| + }
|
| position++;
|
| - if (position == length) fail(position, "Missing expected digit");
|
| - char = source.codeUnitAt(position);
|
| + if (position == length) return beginChunkNumber(NUM_E, start);
|
| + char = getChar(position);
|
| + int expSign = 1;
|
| + int exponent = 0;
|
| if (char == PLUS || char == MINUS) {
|
| + expSign = 0x2C - char; // -1 for MINUS, +1 for PLUS
|
| position++;
|
| - if (position == length) fail(position, "Missing expected digit");
|
| - char = source.codeUnitAt(position);
|
| + if (position == length) return beginChunkNumber(NUM_E_SIGN, start);
|
| + char = getChar(position);
|
| }
|
| - if (char < CHAR_0 || char > CHAR_9) {
|
| + digit = char ^ CHAR_0;
|
| + if (digit > 9) {
|
| fail(position, "Missing expected digit");
|
| }
|
| do {
|
| + exponent = 10 * exponent + digit;
|
| position++;
|
| - if (position == length) break parsing;
|
| - char = source.codeUnitAt(position);
|
| - } while (CHAR_0 <= char && char <= CHAR_9);
|
| + if (position == length) return beginChunkNumber(NUM_E_DIGIT, start);
|
| + char = getChar(position);
|
| + digit = char ^ CHAR_0;
|
| + } while (digit <= 9);
|
| + intValue += expSign * exponent;
|
| }
|
| }
|
| if (!isDouble) {
|
| - listener.handleNumber(intSign * intValue);
|
| + listener.handleNumber(sign * intValue);
|
| return position;
|
| }
|
| - // This correctly creates -0.0 for doubles.
|
| - listener.handleNumber(_parseDouble(source, start, position));
|
| + // Double values at or above this value (2**53) may have lost precission.
|
| + // Only trust results that are below this value.
|
| + const double maxExactDouble = 9007199254740992.0;
|
| + if (doubleValue < maxExactDouble) {
|
| + int exponent = intValue;
|
| + double signedMantissa = doubleValue * sign;
|
| + if (exponent >= -22) {
|
| + if (exponent < 0) {
|
| + listener.handleNumber(signedMantissa / POWERS_OF_TEN[-exponent]);
|
| + return position;
|
| + }
|
| + if (exponent == 0) {
|
| + listener.handleNumber(signedMantissa);
|
| + return position;
|
| + }
|
| + if (exponent <= 22) {
|
| + listener.handleNumber(signedMantissa * POWERS_OF_TEN[exponent]);
|
| + return position;
|
| + }
|
| + }
|
| + }
|
| + // If the value is outside the range +/-maxExactDouble or
|
| + // exponent is outside the range +/-22, then we can't trust simple double
|
| + // arithmetic to get the exact result, so we use the system double parsing.
|
| + listener.handleNumber(parseDouble(start, position));
|
| return position;
|
| }
|
|
|
| - static double _parseDouble(String source, int start, int end)
|
| - native "Double_parse";
|
| + int fail(int position, [String message]) {
|
| + if (message == null) {
|
| + message = "Unexpected character";
|
| + if (position == chunkEnd) message = "Unexpected end of input";
|
| + }
|
| + throw new FormatException(message, chunk, position);
|
| + }
|
| +}
|
| +
|
| +/**
|
| + * Chunked JSON parser that parses [String] chunks.
|
| + */
|
| +class _JsonStringParser extends _ChunkedJsonParser {
|
| + String chunk;
|
| + int chunkEnd;
|
| +
|
| + _JsonStringParser(_JsonListener listener) : super(listener);
|
| +
|
| + int getChar(int position) => chunk.codeUnitAt(position);
|
| +
|
| + String getString(int start, int end) {
|
| + return chunk.substring(start, end);
|
| + }
|
| +
|
| + void beginString() {
|
| + this.buffer = new StringBuffer();
|
| + }
|
| +
|
| + void addSliceToString(int start, int end) {
|
| + StringBuffer buffer = this.buffer;
|
| + buffer.write(chunk.substring(start, end));
|
| + }
|
| +
|
| + void addCharToString(int charCode) {
|
| + StringBuffer buffer = this.buffer;
|
| + buffer.writeCharCode(charCode);
|
| + }
|
| +
|
| + String endString() {
|
| + StringBuffer buffer = this.buffer;
|
| + this.buffer = null;
|
| + return buffer.toString();
|
| + }
|
| +
|
| + void copyCharsToList(int start, int end, List target, int offset) {
|
| + int length = end - start;
|
| + for (int i = 0; i < length; i++) {
|
| + target[offset + i] = chunk.codeUnitAt(start + i);
|
| + }
|
| + }
|
| +
|
| + double parseDouble(int start, int end) {
|
| + return _parseDouble(chunk, start, end);
|
| + }
|
| +}
|
| +
|
| +patch class JsonDecoder {
|
| + /* patch */ StringConversionSink startChunkedConversion(Sink<Object> sink) {
|
| + return new _JsonStringDecoderSink(this._reviver, sink);
|
| + }
|
| +}
|
| +
|
| +/**
|
| + * Implements the chunked conversion from a JSON string to its corresponding
|
| + * object.
|
| + *
|
| + * The sink only creates one object, but its input can be chunked.
|
| + */
|
| +class _JsonStringDecoderSink extends StringConversionSinkBase {
|
| + _ChunkedJsonParser _parser;
|
| + Function _reviver;
|
| + final Sink<Object> _sink;
|
| +
|
| + _JsonStringDecoderSink(reviver, this._sink)
|
| + : _reviver = reviver, _parser = _createParser(reviver);
|
| +
|
| + static _ChunkedJsonParser _createParser(reviver) {
|
| + _BuildJsonListener listener;
|
| + if (reviver == null) {
|
| + listener = new _BuildJsonListener();
|
| + } else {
|
| + listener = new _ReviverJsonListener(reviver);
|
| + }
|
| + return new _JsonStringParser(listener);
|
| + }
|
| +
|
| + void addSlice(String chunk, int start, int end, bool isLast) {
|
| + _parser.chunk = chunk;
|
| + _parser.chunkEnd = end;
|
| + _parser.parse(start);
|
| + if (isLast) _parser.close();
|
| + }
|
| +
|
| + void add(String chunk) {
|
| + addSlice(chunk, 0, chunk.length, false);
|
| + }
|
| +
|
| + void close() {
|
| + _parser.close();
|
| + var decoded = _parser.result;
|
| + _sink.add(decoded);
|
| + _sink.close();
|
| + }
|
| +
|
| + Utf8ConversionSink asUtf8Sink(bool allowMalformed) {
|
| + _parser = null;
|
| + return new _JsonUtf8DecoderSink(_reviver, _sink, allowMalformed);
|
| + }
|
| +}
|
| +
|
| +class _Utf8StringBuffer {
|
| + static const int INITIAL_CAPACITY = 32;
|
| + // Partial state encoding.
|
| + static const int MASK_TWO_BIT = 0x03;
|
| + static const int MASK_SIZE = MASK_TWO_BIT;
|
| + static const int SHIFT_MISSING = 2;
|
| + static const int SHIFT_VALUE = 4;
|
| + static const int NO_PARTIAL = 0;
|
| +
|
| + // UTF-8 encoding and limits.
|
| + static const int MAX_ASCII = 127;
|
| + static const int MAX_TWO_BYTE = 0x7ff;
|
| + static const int MAX_THREE_BYTE = 0xffff;
|
| + static const int MAX_UNICODE = 0X10ffff;
|
| + static const int MASK_TWO_BYTE = 0x1f;
|
| + static const int MASK_THREE_BYTE = 0x0f;
|
| + static const int MASK_FOUR_BYTE = 0x07;
|
| + static const int MASK_CONTINUE_TAG = 0xC0;
|
| + static const int MASK_CONTINUE_VALUE = 0x3f;
|
| + static const int CONTINUE_TAG = 0x80;
|
| +
|
| + // UTF-16 surrogate encoding.
|
| + static const int LEAD_SURROGATE = 0xD800;
|
| + static const int TAIL_SURROGATE = 0xDC00;
|
| + static const int SHIFT_HIGH_SURROGATE = 10;
|
| + static const int MASK_LOW_SURROGATE = 0x3ff;
|
| +
|
| + // The internal buffer starts as Uint8List, but may change to Uint16List
|
| + // if the string contains non-Latin-1 characters.
|
| + List<int> buffer = new Uint8List(INITIAL_CAPACITY);
|
| + // Number of elements in buffer.
|
| + int length = 0;
|
| + // Partial decoding state, for cases where an UTF-8 sequences is split
|
| + // between chunks.
|
| + int partialState = NO_PARTIAL;
|
| + // Whether all characters so far have been Latin-1 (and the buffer is
|
| + // still a Uint8List). Set to false when the first non-Latin-1 character
|
| + // is encountered, and the buffer is then also converted to a Uint16List.
|
| + bool isLatin1 = true;
|
| + // If allowing malformed, invalid UTF-8 sequences are converted to
|
| + // U+FFFD.
|
| + bool allowMalformed;
|
| +
|
| + _Utf8StringBuffer(this.allowMalformed);
|
| +
|
| + /**
|
| + * Parse the continuation of a multi-byte UTF-8 sequence.
|
| + *
|
| + * Parse [utf8] from [position] to [end]. If the sequence extends beyond
|
| + * `end`, store the partial state in [partialState], and continue from there
|
| + * on the next added slice.
|
| + *
|
| + * The [size] is the number of expected continuation bytes total,
|
| + * and [missing] is the number of remaining continuation bytes.
|
| + * The [size] is used to detect overlong encodings.
|
| + * The [value] is the value collected so far.
|
| + *
|
| + * When called after seeing the first multi-byte marker, the [size] and
|
| + * [missing] values are always the same, but they may differ if continuing
|
| + * after a partial sequence.
|
| + */
|
| + int addContinuation(List<int> utf8, int position, int end,
|
| + int size, int missing, int value) {
|
| + int codeEnd = position + missing;
|
| + do {
|
| + if (position == end) {
|
| + missing = codeEnd - position;
|
| + partialState =
|
| + size | (missing << SHIFT_MISSING) | (value << SHIFT_VALUE);
|
| + return end;
|
| + }
|
| + int char = utf8[position];
|
| + if ((char & MASK_CONTINUE_TAG) != CONTINUE_TAG) {
|
| + if (allowMalformed) {
|
| + addCharCode(0xFFFD);
|
| + return position;
|
| + }
|
| + throw new FormatException("Expected UTF-8 continuation byte, "
|
| + "found $char", utf8, position);
|
| + }
|
| + value = 64 * value + (char & MASK_CONTINUE_VALUE);
|
| + position++;
|
| + } while (position < codeEnd);
|
| + if (value <= const [0, MAX_ASCII, MAX_TWO_BYTE, MAX_THREE_BYTE][size]) {
|
| + // Over-long encoding.
|
| + if (allowMalformed) {
|
| + value = 0xFFFD;
|
| + } else {
|
| + throw new FormatException(
|
| + "Invalid encoding: U+${value.toRadixString(16).padLeft(4, '0')}"
|
| + " encoded in ${size + 1} bytes.", utf8, position - 1);
|
| + }
|
| + }
|
| + addCharCode(value);
|
| + return position;
|
| + }
|
| +
|
| + void addCharCode(int char) {
|
| + assert(char >= 0);
|
| + assert(char <= MAX_UNICODE);
|
| + if (partialState != NO_PARTIAL) {
|
| + if (allowMalformed) {
|
| + partialState = NO_PARTIAL;
|
| + addCharCode(0xFFFD);
|
| + } else {
|
| + throw new FormatException("Incomplete UTF-8 sequence", utf8);
|
| + }
|
| + }
|
| + if (isLatin1 && char > 0xff) {
|
| + _to16Bit(); // Also grows a little if close to full.
|
| + }
|
| + int length = this.length;
|
| + if (char <= MAX_THREE_BYTE) {
|
| + if (length == buffer.length) _grow();
|
| + buffer[length] = char;
|
| + this.length = length + 1;
|
| + return;
|
| + }
|
| + if (length + 2 > buffer.length) _grow();
|
| + int bits = char - 0x10000;
|
| + buffer[length] = LEAD_SURROGATE | (bits >> SHIFT_HIGH_SURROGATE);
|
| + buffer[length + 1] = TAIL_SURROGATE | (bits & MASK_LOW_SURROGATE);
|
| + this.length = length + 2;
|
| + }
|
| +
|
| + void _to16Bit() {
|
| + assert(isLatin1);
|
| + Uint16List newBuffer;
|
| + if ((length + INITIAL_CAPACITY) * 2 <= buffer.length) {
|
| + // Reuse existing buffer if it's big enough.
|
| + newBuffer = new Uint16List.view(buffer.buffer);
|
| + } else {
|
| + int newCapacity = buffer.length;
|
| + if (newCapacity - length < INITIAL_CAPACITY) {
|
| + newCapacity = length + INITIAL_CAPACITY;
|
| + }
|
| + newBuffer = new Uint16List(newCapacity);
|
| + }
|
| + newBuffer.setRange(0, length, buffer);
|
| + buffer = newBuffer;
|
| + isLatin1 = false;
|
| + }
|
| +
|
| + void _grow() {
|
| + int newCapacity = buffer.length * 2;
|
| + List newBuffer;
|
| + if (isLatin1) {
|
| + newBuffer = new Uint8List(newCapacity);
|
| + } else {
|
| + newBuffer = new Uint16List(newCapacity);
|
| + }
|
| + newBuffer.setRange(0, length, buffer);
|
| + buffer = newBuffer;
|
| + }
|
| +
|
| + void addSlice(List<int> utf8, int position, int end) {
|
| + assert(position < end);
|
| + if (partialState > 0) {
|
| + int continueByteCount = (partialState & MASK_TWO_BIT);
|
| + int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT;
|
| + int value = partialState >> SHIFT_VALUE;
|
| + partialState = NO_PARTIAL;
|
| + position = addContinuation(utf8, position, end,
|
| + continueByteCount, missing, value);
|
| + if (position == end) return;
|
| + }
|
| + // Keep index and capacity in local variables while looping over
|
| + // ASCII characters.
|
| + int index = length;
|
| + int capacity = buffer.length;
|
| + while (position < end) {
|
| + int char = utf8[position];
|
| + if (char <= MAX_ASCII) {
|
| + if (index == capacity) {
|
| + length = index;
|
| + _grow();
|
| + capacity = buffer.length;
|
| + }
|
| + buffer[index++] = char;
|
| + position++;
|
| + continue;
|
| + }
|
| + length = index;
|
| + if ((char & MASK_CONTINUE_TAG) == CONTINUE_TAG) {
|
| + if (allowMalformed) {
|
| + addCharCode(0xFFFD);
|
| + position++;
|
| + } else {
|
| + throw new FormatException("Unexepected UTF-8 continuation byte",
|
| + utf8, position);
|
| + }
|
| + } else if (char < 0xE0) { // C0-DF
|
| + // Two-byte.
|
| + position = addContinuation(utf8, position + 1, end, 1, 1,
|
| + char & MASK_TWO_BYTE);
|
| + } else if (char < 0xF0) { // E0-EF
|
| + // Three-byte.
|
| + position = addContinuation(utf8, position + 1, end, 2, 2,
|
| + char & MASK_THREE_BYTE);
|
| + } else if (char < 0xF8) { // F0-F7
|
| + // Four-byte.
|
| + position = addContinuation(utf8, position + 1, end, 3, 3,
|
| + char & MASK_FOUR_BYTE);
|
| + } else {
|
| + if (allowMalformed) {
|
| + addCharCode(0xFFFD);
|
| + position++;
|
| + } else {
|
| + throw new FormatException("Invalid UTF-8 byte: $char",
|
| + utf8, position);
|
| + }
|
| + }
|
| + index = length;
|
| + capacity = buffer.length;
|
| + }
|
| + length = index;
|
| + }
|
| +
|
| + String toString() {
|
| + if (partialState != NO_PARTIAL) {
|
| + if (allowMalformed) {
|
| + partialState = NO_PARTIAL;
|
| + addCharCode(0xFFFD);
|
| + } else {
|
| + int continueByteCount = (partialState & MASK_TWO_BIT);
|
| + int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT;
|
| + int value = partialState >> SHIFT_VALUE;
|
| + int seenByteCount = continueByteCount - missing + 1;
|
| + List source = new Uint8List(seenByteCount);
|
| + while (seenByteCount > 1) {
|
| + seenByteCount--;
|
| + source[seenByteCount] = CONTINUE_TAG | (value & MASK_CONTINUE_VALUE);
|
| + value >>= 6;
|
| + }
|
| + source[0] = value | (0x3c0 >> (continueByteCount - 1));
|
| + throw new FormatException("Incomplete UTF-8 sequence",
|
| + source, source.length);
|
| + }
|
| + }
|
| + return new String.fromCharCodes(buffer, 0, length);
|
| + }
|
| +}
|
| +
|
| +/**
|
| + * Chunked JSON parser that parses UTF-8 chunks.
|
| + */
|
| +class _JsonUtf8Parser extends _ChunkedJsonParser {
|
| + final bool allowMalformed;
|
| + List<int> chunk;
|
| + int chunkEnd;
|
| +
|
| + _JsonUtf8Parser(_JsonListener listener, this.allowMalformed)
|
| + : super(listener);
|
| +
|
| + int getChar(int position) => chunk[position];
|
| +
|
| + String getString(int start, int end) {
|
| + beginString();
|
| + addSliceToString(start, end);
|
| + String result = endString();
|
| + return result;
|
| + }
|
| +
|
| + void beginString() {
|
| + this.buffer = new _Utf8StringBuffer(allowMalformed);
|
| + }
|
| +
|
| + void addSliceToString(int start, int end) {
|
| + _Utf8StringBuffer buffer = this.buffer;
|
| + buffer.addSlice(chunk, start, end);
|
| + }
|
| +
|
| + void addCharToString(int charCode) {
|
| + _Utf8StringBuffer buffer = this.buffer;
|
| + buffer.addCharCode(charCode);
|
| + }
|
| +
|
| + String endString() {
|
| + _Utf8StringBuffer buffer = this.buffer;
|
| + this.buffer = null;
|
| + return buffer.toString();
|
| + }
|
| +
|
| + void copyCharsToList(int start, int end, List target, int offset) {
|
| + int length = end - start;
|
| + target.setRange(offset, offset + length, chunk, start);
|
| + }
|
| +
|
| + double parseDouble(int start, int end) {
|
| + String string = getString(start, end);
|
| + reutrn _parseDouble(string, 0, string.length);
|
| + }
|
| +}
|
| +
|
| +double _parseDouble(String source, int start, int end)
|
| + native "Double_parse";
|
| +
|
| +/**
|
| + * Implements the chunked conversion from a UTF-8 encoding of JSON
|
| + * to its corresponding object.
|
| + */
|
| +class _JsonUtf8DecoderSink extends ByteConversionSinkBase {
|
| + _ChunkedUtf8Parser _parser;
|
| + final Sink<Object> _sink;
|
| +
|
| + _JsonUtf8DecoderSink(reviver, this._sink, bool allowMalformed)
|
| + : _parser = _createParser(reviver, allowMalformed);
|
| +
|
| + static _ChunkedJsonParser _createParser(reviver, bool allowMalformed) {
|
| + _BuildJsonListener listener;
|
| + if (reviver == null) {
|
| + listener = new _BuildJsonListener();
|
| + } else {
|
| + listener = new _ReviverJsonListener(reviver);
|
| + }
|
| + return new _JsonUtf8Parser(listener, allowMalformed);
|
| + }
|
| +
|
| + void addSlice(List<int> chunk, int start, int end, bool isLast) {
|
| + _addChunk(chunk, start, end);
|
| + if (isLast) close();
|
| + }
|
| +
|
| + void add(List<int> chunk) {
|
| + _addChunk(chunk, 0, chunk.length);
|
| + }
|
| +
|
| + void _addChunk(List<int> chunk, int start, int end) {
|
| + _parser.chunk = chunk;
|
| + _parser.chunkEnd = end;
|
| + _parser.parse(start);
|
| + }
|
|
|
| - void fail(int position, [String message]) {
|
| - if (message == null) message = "Unexpected character";
|
| - throw new FormatException(message, source, position);
|
| + void close() {
|
| + _parser.close();
|
| + var decoded = _parser.result;
|
| + _sink.add(decoded);
|
| + _sink.close();
|
| }
|
| }
|
|
|