Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(229)

Unified Diff: runtime/lib/convert_patch.dart

Issue 649113005: Make JSON parsing work as a chunked conversion sink. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Address comments. Fix bug. Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | runtime/lib/double_patch.dart » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: runtime/lib/convert_patch.dart
diff --git a/runtime/lib/convert_patch.dart b/runtime/lib/convert_patch.dart
index 2a4ab1bc8af658b484fc94e8726a2c2afc8f8627..64795b8b4b8c4078d2fd1748908cb372da7184c4 100644
--- a/runtime/lib/convert_patch.dart
+++ b/runtime/lib/convert_patch.dart
@@ -1,7 +1,9 @@
-// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
+// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
+import "dart:_internal" show POWERS_OF_TEN;
+
// JSON conversion.
patch _parseJson(String json, reviver(var key, var value)) {
@@ -11,7 +13,11 @@ patch _parseJson(String json, reviver(var key, var value)) {
} else {
listener = new _ReviverJsonListener(reviver);
}
- new _JsonParser(json, listener).parse();
+ var parser = new _JsonStringParser(listener);
+ parser.chunk = json;
+ parser.chunkEnd = json.length;
+ parser.parse(0);
+ parser.close();
return listener.result;
}
@@ -19,6 +25,9 @@ patch _parseJson(String json, reviver(var key, var value)) {
// Simple API for JSON parsing.
+/**
+ * Listener for parsing events from [_ChunkedJsonParser].
+ */
abstract class _JsonListener {
void handleString(String value) {}
void handleNumber(num value) {}
@@ -34,7 +43,7 @@ abstract class _JsonListener {
}
/**
- * A [JsonListener] that builds data objects from the parser events.
+ * A [_JsonListener] that builds data objects from the parser events.
*
* This is a simple stack-based object builder. It keeps the most recently
* seen value in a variable, and uses it depending on the following event.
@@ -135,7 +144,72 @@ class _ReviverJsonListener extends _BuildJsonListener {
}
}
-class _JsonParser {
+/**
+ * Buffer holding parts of a numeral.
+ *
+ * The buffer contains the characters of a JSON number.
+ * These are all ASCII, so an [Uint8List] is used as backing store.
+ *
+ * This buffer is used when a JSON number is split between separate chunks.
+ *
+ */
+class _NumberBuffer {
+ static const int minCapacity = 16;
+ static const int kDefaultOverhead = 5;
+ Uint8List list;
+ int length = 0;
+ _NumberBuffer(int initialCapacity)
+ : list = new Uint8List(_initialCapacity(initialCapacity));
+
+ int get capacity => list.length;
+
+ // Pick an initial capacity greater than the first part's size.
+ // The typical use case has two parts, this is the attempt at
+ // guessing the size of the second part without overdoing it.
+ // The default estimate of the second part is [kDefaultOverhead],
+ // then round to multiplum of four, and return the result,
+ // or [minCapacity] if that is greater.
+ static int _initialCapacity(int minCapacity) {
+ minCapacity += kDefaultOverhead;
+ if (minCapacity < minCapacity) return minCapacity;
+ minCapacity = (minCapacity + 3) & ~3; // Round to multiple of four.
+ return minCapacity;
+ }
+
+ // Grows to the exact size asked for.
+ void ensureCapacity(int newCapacity) {
+ Uint8List list = this.list;
+ if (newCapacity <= list.length) return;
+ Uint8List newList = new Uint8List(newCapacity);
+ newList.setRange(0, list.length, list, 0);
+ this.list = newList;
+ }
+
+ String getString() {
+ var list = this.list;
+ if (length < list.length) {
+ list = new Uint8List.view(list.buffer, 0, length);
+ }
+ String result = new String.fromCharCodes(list);
+ return result;
+ }
+
+ // TODO(lrn): See if parsing of numbers can be abstracted to something
+ // not only working on strings, but also on char-code lists, without lossing
+ // performance.
+ int parseInt() => int.parse(getString());
+ double parseDouble() => double.parse(getString());
+}
+
+/**
+ * Chunked JSON parser.
+ *
+ * Receives inputs in chunks, gives access to individual parts of the input,
+ * and stores input state between chunks.
+ *
+ * Implementations include [String] and UTF-8 parsers.
+ */
+abstract class _ChunkedJsonParser {
// A simple non-recursive state-based parser for JSON.
//
// Literal values accepted in states ARRAY_EMPTY, ARRAY_COMMA, OBJECT_COLON
@@ -172,11 +246,11 @@ class _JsonParser {
static const int NO_VALUES = 12;
// Objects and arrays are "empty" until their first property/element.
+ // At this position, they may either have an entry or a close-bracket.
static const int EMPTY = 0;
static const int NON_EMPTY = 16;
static const int EMPTY_MASK = 16; // Empty if zero.
-
static const int VALUE_READ_BITS = NO_VALUES | NON_EMPTY;
// Actual states.
@@ -226,18 +300,466 @@ class _JsonParser {
static const int LBRACE = 0x7b;
static const int RBRACE = 0x7d;
- final String source;
+ // State of partial value at chunk split.
+ static const int NO_PARTIAL = 0;
+ static const int PARTIAL_STRING = 1;
+ static const int PARTIAL_NUMERAL = 2;
+ static const int PARTIAL_KEYWORD = 3;
+ static const int MASK_PARTIAL = 3;
+
+ // Partial states for numerals. Values can be |'ed with PARTIAL_NUMERAL.
+ static const int NUM_SIGN = 0; // After initial '-'.
+ static const int NUM_ZERO = 4; // After '0' as first digit.
+ static const int NUM_DIGIT = 8; // After digit, no '.' or 'e' seen.
+ static const int NUM_DOT = 12; // After '.'.
+ static const int NUM_DOT_DIGIT = 16; // After a decimal digit (after '.').
+ static const int NUM_E = 20; // After 'e' or 'E'.
+ static const int NUM_E_SIGN = 24; // After '-' or '+' after 'e' or 'E'.
+ static const int NUM_E_DIGIT = 28; // After exponent digit.
+ static const int NUM_SUCCESS = 32; // Never stored as partial state.
+
+ // Partial states for strings.
+ static const int STR_PLAIN = 0; // Inside string, but not escape.
+ static const int STR_ESCAPE = 4; // After '\'.
+ static const int STR_U = 16; // After '\u' and 0-3 hex digits.
+ static const int STR_U_COUNT_SHIFT = 2; // Hex digit count in bits 2-3.
+ static const int STR_U_VALUE_SHIFT = 5; // Hex digit value in bits 5+.
+
+ // Partial states for keywords.
+ static const int KWD_TYPE_MASK = 12;
+ static const int KWD_TYPE_SHIFT = 2;
+ static const int KWD_NULL = 0; // Prefix of "null" seen.
+ static const int KWD_TRUE = 4; // Prefix of "true" seen.
+ static const int KWD_FALSE = 8; // Prefix of "false" seen.
+ static const int KWD_COUNT_SHIFT = 4; // Prefix length in bits 4+.
+
+ // Mask used to mask off two lower bits.
+ static const int TWO_BIT_MASK = 3;
+
final _JsonListener listener;
- _JsonParser(this.source, this.listener);
-
- /** Parses [source], or throws if it fails. */
- void parse() {
- final List<int> states = <int>[];
- int state = STATE_INITIAL;
- int position = 0;
- int length = source.length;
+
+ // The current parsing state.
+ int state = STATE_INITIAL;
+ List<int> states = <int>[];
+
+ /**
+ * Stores tokenizer state between chunks.
+ *
+ * This state is stored when a chunk stops in the middle of a
+ * token (string, numeral, boolean or null).
+ *
+ * The partial state is used to continue parsing on the next chunk.
+ * The previous chunk is not retained, any data needed are stored in
+ * this integer, or in the [buffer] field as a string-building buffer
+ * or a [_NumberBuffer].
+ *
+ * Prefix state stored in [prefixState] as bits.
+ *
+ * ..00 : No partial value (NO_PARTIAL).
+ *
+ * ..00001 : Partial string, not inside escape.
+ * ..00101 : Partial string, after '\'.
+ * ..vvvv1dd01 : Partial \u escape.
+ * The 'dd' bits (2-3) encode the number of hex digits seen.
+ * Bits 5-16 encode the value of the hex digits seen so far.
+ *
+ * ..0ddd10 : Partial numeral.
+ * The `ddd` bits store the parts of in the numeral seen so
+ * far, as the constants `NUM_*` defined above.
+ * The characters of the numeral are stored in [buffer]
+ * as a [_NumberBuffer].
+ *
+ * ..0ddd0011 : Partial 'null' keyword.
+ * ..0ddd0111 : Partial 'true' keyword.
+ * ..0ddd1011 : Partial 'false' keyword.
+ * For all three keywords, the `ddd` bits encode the number
+ * of letters seen.
+ */
+ int partialState = NO_PARTIAL;
+
+ /**
+ * Extra data stored while parsing a primitive value.
+ * May be set during parsing, always set at chunk end if a value is partial.
+ *
+ * May contain a string buffer while parsing strings.
+ */
+ var buffer = null;
+
+ _ChunkedJsonParser(this.listener);
+
+ /**
+ * Push the current parse [state] on a stack.
+ *
+ * State is pushed when a new array or object literal starts,
+ * so the parser can go back to the correct value when the literal ends.
+ */
+ void saveState(int state) {
+ states.add(state);
+ }
+
+ /**
+ * Restore a state pushed with [saveState].
+ */
+ int restoreState() {
+ return states.removeLast(); // Throws if empty.
+ }
+
+ /**
+ * Finalizes the parsing.
+ *
+ * Throws if the source read so far doesn't end up with a complete
+ * parsed value. That means it must not be inside a list or object
+ * literal, and any partial value read should also be a valid complete
+ * value.
+ *
+ * The only valid partial state is a number that ends in a digit, and
+ * only if the number is the entire JSON value being parsed
+ * (otherwise it would be inside a list or object).
+ * Such a number will be completed. Any other partial state is an error.
+ */
+ void close() {
+ if (partialState != NO_PARTIAL) {
+ int partialType = partialState & MASK_PARTIAL;
+ if (partialType == PARTIAL_NUMERAL) {
+ int numState = partialState & ~MASK_PARTIAL;
+ // A partial number might be a valid number if we know it's done.
+ // There is an unnecessary overhead if input is a single number,
+ // but this is assumed to be rare.
+ _NumberBuffer buffer = this.buffer;
+ this.buffer = null;
+ finishChunkNumber(numState, 0, 0, buffer);
+ } else if (partialType == PARTIAL_STRING) {
+ fail(chunkEnd, "Unterminated string");
+ } else {
+ assert(partialType == PARTIAL_KEYWORD);
+ fail(chunkEnd); // Incomplete literal.
+ }
+ }
+ if (state != STATE_END) {
+ fail(chunkEnd);
+ }
+ }
+
+ /**
+ * Read out the result after successfully closing the parser.
+ *
+ * The parser is closed by calling [close] or calling [addSourceChunk] with
+ * `true` as second (`isLast`) argument.
+ */
+ Object get result {
+ return listener.result;
+ }
+
+ /** Sets the current source chunk. */
+ void set chunk(var source);
+
+ /**
+ * Length of current chunk.
+ *
+ * The valid arguments to [getChar] are 0 .. `chunkEnd - 1`.
+ */
+ int get chunkEnd;
+
+ /**
+ * Returns the chunk itself.
+ *
+ * Only used by [fail] to include the chunk in the thrown [FormatException].
+ */
+ get chunk;
+
+ /**
+ * Get charcacter/code unit of current chunk.
+ *
+ * The [index] must be non-negative and less than `chunkEnd`.
+ * In practive, [index] will be no smaller than the `start` argument passed
+ * to [parse].
+ */
+ int getChar(int index);
+
+ /**
+ * Copy ASCII characters from start to end of chunk into a list.
+ *
+ * Used for number buffer (always copies ASCII, so encoding is not important).
+ */
+ void copyCharsToList(int start, int end, List<int> target);
+
+ /**
+ * Build a string using input code units.
+ *
+ * Creates a string buffer and enables adding characters and slices
+ * to that buffer.
+ * The buffer is stored in the [buffer] field. If the string is unterminated,
+ * the same buffer is used to continue parsing in the next chunk.
+ */
+ void beginString();
+ /**
+ * Add single character code to string being built.
+ *
+ * Used for unparsed escape sequences.
+ */
+ void addCharToString(int charCode);
+
+ /**
+ * Adds slice of current chunk to string being built.
+ *
+ * The [start] positions is inclusive, [end] is exclusive.
+ */
+ void addSliceToString(int start, int end);
+
+ /** Finalizes the string being built and returns it as a String. */
+ String endString();
+
+ /**
+ * Extracts a literal string from a slice of the current chunk.
+ *
+ * No interpretation of the content is performed, except for converting
+ * the source format to string.
+ * This can be implemented more or less efficiently depending on the
+ * underlying source.
+ *
+ * This is used for string literals that contain no escapes.
+ */
+ String getString(int start, int end);
+
+ /**
+ * Parse a slice of the current chunk as an integer.
+ *
+ * The format is expected to be correct.
+ */
+ int parseInt(int start, int end) {
+ return int.parse(getString(start, end));
+ }
+
+ /**
+ * Parse a slice of the current chunk as a double.
+ *
+ * The format is expected to be correct.
+ * This is used by [parseNumber] when the double value cannot be
+ * built exactly during parsing.
+ */
+ double parseDouble(int start, int end) {
+ return double.parse(getString(start, end));
+ }
+
+ /**
+ * Create a _NumberBuffer containing the digits from [start] to [chunkEnd].
+ *
+ * This creates a number buffer and initializes it with the part of the
+ * number literal ending the current chunk
+ */
+ void createNumberBuffer(int start) {
+ assert(start >= 0);
+ assert(start < chunkEnd);
+ int length = chunkEnd - start;
+ var buffer = new _NumberBuffer(length);
+ copyCharsToList(start, chunkEnd, buffer.list);
+ buffer.length = length;
+ return buffer;
+ }
+
+ /**
+ * Continues parsing a partial value.
+ */
+ int parsePartial(int position) {
+ if (position == chunkEnd) return position;
+ int partialState = this.partialState;
+ assert(partialState != NO_PARTIAL);
+ int partialType = partialState & MASK_PARTIAL;
+ this.partialState = NO_PARTIAL;
+ partialState = partialState & ~MASK_PARTIAL;
+ assert(partialType != 0);
+ if (partialType == PARTIAL_STRING) {
+ position = parsePartialString(position, partialState);
+ } else if (partialType == PARTIAL_NUMERAL) {
+ position = parsePartialNumber(position, partialState);
+ } else if (partialType == PARTIAL_KEYWORD) {
+ position = parsePartialKeyword(position, partialState);
+ }
+ return position;
+ }
+
+ /**
+ * Parses the remainder of a number into the number buffer.
+ *
+ * Syntax is checked while pasing.
+ * Starts at position, which is expected to be the start of the chunk,
+ * and returns the index of the first non-number-literal character found,
+ * or chunkEnd if the entire chunk is a valid number continuation.
+ * Throws if a syntax error is detected.
+ */
+ int parsePartialNumber(int position, int state) {
+ int start = position;
+ // Primitive implementation, can be optimized.
+ _NumberBuffer buffer = this.buffer;
+ this.buffer = null;
+ int end = chunkEnd;
+ toBailout: {
+ if (position == end) break toBailout;
+ int char = getChar(position);
+ int digit = char ^ CHAR_0;
+ if (state == NUM_SIGN) {
+ if (digit <= 9) {
+ if (digit == 0) {
+ state = NUM_ZERO;
+ } else {
+ state = NUM_DIGIT;
+ }
+ position++;
+ if (position == end) break toBailout;
+ char = getChar(position);
+ digit = char ^ CHAR_0;
+ } else {
+ return fail(position);
+ }
+ }
+ if (state == NUM_ZERO) {
+ // JSON does not allow insignificant leading zeros (e.g., "09").
+ if (digit <= 9) return fail(position);
+ state = NUM_DIGIT;
+ }
+ while (state == NUM_DIGIT) {
+ if (digit > 9) {
+ if (char == DECIMALPOINT) {
+ state = NUM_DOT;
+ } else if ((char | 0x20) == CHAR_e) {
+ state = NUM_E;
+ } else {
+ finishChunkNumber(state, start, position, buffer);
+ return position;
+ }
+ }
+ position++;
+ if (position == end) break toBailout;
+ char = getChar(position);
+ digit = char ^ CHAR_0;
+ }
+ if (state == NUM_DOT) {
+ if (digit > 9) return fail(position);
+ state = NUM_DOT_DIGIT;
+ }
+ while (state == NUM_DOT_DIGIT) {
+ if (digit > 9) {
+ if ((char | 0x20) == CHAR_e) {
+ state = NUM_E;
+ } else {
+ finishChunkNumber(state, start, position, buffer);
+ return position;
+ }
+ }
+ position++;
+ if (position == end) break toBailout;
+ char = getChar(position);
+ digit = char ^ CHAR_0;
+ }
+ if (state == NUM_E) {
+ if (char == PLUS || char == MINUS) {
+ state = NUM_E_SIGN;
+ position++;
+ if (position == end) break toBailout;
+ char = getChar(position);
+ digit = char ^ CHAR_0;
+ }
+ }
+ assert(state >= NUM_E);
+ while (digit <= 9) {
+ state = NUM_E_DIGIT;
+ position++;
+ if (position == end) break toBailout;
+ char = getChar(position);
+ digit = char ^ CHAR_0;
+ }
+ finishChunkNumber(state, start, position, buffer);
+ return position;
+ }
+ // Bailout code in case the current chunk ends while parsing the numeral.
+ assert(position == end);
+ continueChunkNumber(state, start, buffer);
+ return chunkEnd;
+ }
+
+ /**
+ * Continues parsing a partial string literal.
+ *
+ * Handles partial escapes and then hands the parsing off to
+ * [parseStringToBuffer].
+ */
+ int parsePartialString(int position, int partialState) {
+ if (partialState == STR_PLAIN) {
+ return parseStringToBuffer(position);
+ }
+ if (partialState == STR_ESCAPE) {
+ position = parseStringEscape(position);
+ // parseStringEscape sets partialState if it sees the end.
+ if (position == chunkEnd) return position;
+ return parseStringToBuffer(position);
+ }
+ assert((partialState & STR_U) != 0);
+ int value = partialState >> STR_U_VALUE_SHIFT;
+ int count = (partialState >> STR_U_COUNT_SHIFT) & TWO_BIT_MASK;
+ for (int i = count; i < 4; i++, position++) {
+ if (position == chunkEnd) return chunkStringEscapeU(i, value);
+ int char = getChar(position);
+ int digit = parseHexDigit(char);
+ if (digit < 0) fail(position, "Invalid hex digit");
+ value = 16 * value + digit;
+ }
+ addCharToString(value);
+ return parseStringToBuffer(position);
+ }
+
+ /**
+ * Continues parsing a partial keyword.
+ */
+ int parsePartialKeyword(int position, int partialState) {
+ int keywordType = partialState & KWD_TYPE_MASK;
+ int count = partialState >> KWD_COUNT_SHIFT;
+ int keywordTypeIndex = keywordType >> KWD_TYPE_SHIFT;
+ String keyword = const ["null", "true", "false"][keywordTypeIndex];
+ assert(count < keyword.length);
+ do {
+ if (position == chunkEnd) {
+ this.partialState =
+ PARTIAL_KEYWORD | keywordType | (count << KWD_COUNT_SHIFT);
+ return chunkEnd;
+ }
+ int expectedChar = keyword.codeUnitAt(count);
+ if (getChar(position) != expectedChar) return fail(position);
+ position++;
+ count++;
+ } while (count < keyword.length);
+ if (keywordType == KWD_NULL) {
+ listener.handleNull();
+ } else {
+ listener.handleBool(keywordType == KWD_TRUE);
+ }
+ return position;
+ }
+
+ /** Convert hex-digit to its value. Returns -1 if char is not a hex digit. */
+ int parseHexDigit(int char) {
+ int digit = char ^ 0x30;
+ if (digit <= 9) return digit;
+ int letter = (char | 0x20) ^ 0x60;
+ // values 1 .. 6 are 'a' through 'f'
+ if (letter <= 6 && letter > 0) return letter + 9;
+ return -1;
+ }
+
+ /**
+ * Parses the current chunk as a chunk of JSON.
+ *
+ * Starts parsing at [position] and continues until [chunkEnd].
+ * Continues parsing where the previous chunk (if any) ended.
+ */
+ void parse(int position) {
+ int length = chunkEnd;
+ if (partialState != NO_PARTIAL) {
+ position = parsePartial(position);
+ if (position == length) return;
+ }
+ int state = this.state;
while (position < length) {
- int char = source.codeUnitAt(position);
+ int char = getChar(position);
switch (char) {
case SPACE:
case CARRIAGE_RETURN:
@@ -246,41 +768,41 @@ class _JsonParser {
position++;
break;
case QUOTE:
- if ((state & ALLOW_STRING_MASK) != 0) fail(position);
- position = parseString(position + 1);
+ if ((state & ALLOW_STRING_MASK) != 0) return fail(position);
state |= VALUE_READ_BITS;
+ position = parseString(position + 1);
break;
case LBRACKET:
- if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
+ if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
listener.beginArray();
- states.add(state);
+ saveState(state);
state = STATE_ARRAY_EMPTY;
position++;
break;
case LBRACE:
- if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
+ if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
listener.beginObject();
- states.add(state);
+ saveState(state);
state = STATE_OBJECT_EMPTY;
position++;
break;
case CHAR_n:
- if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
- position = parseNull(position);
+ if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
state |= VALUE_READ_BITS;
+ position = parseNull(position);
break;
case CHAR_f:
- if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
- position = parseFalse(position);
+ if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
state |= VALUE_READ_BITS;
+ position = parseFalse(position);
break;
case CHAR_t:
- if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
- position = parseTrue(position);
+ if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);
state |= VALUE_READ_BITS;
+ position = parseTrue(position);
break;
case COLON:
- if (state != STATE_OBJECT_KEY) fail(position);
+ if (state != STATE_OBJECT_KEY) return fail(position);
listener.propertyName();
state = STATE_OBJECT_COLON;
position++;
@@ -295,7 +817,7 @@ class _JsonParser {
state = STATE_ARRAY_COMMA;
position++;
} else {
- fail(position);
+ return fail(position);
}
break;
case RBRACKET:
@@ -305,9 +827,9 @@ class _JsonParser {
listener.arrayElement();
listener.endArray();
} else {
- fail(position);
+ return fail(position);
}
- state = states.removeLast() | VALUE_READ_BITS;
+ state = restoreState() | VALUE_READ_BITS;
position++;
break;
case RBRACE:
@@ -317,19 +839,19 @@ class _JsonParser {
listener.propertyValue();
listener.endObject();
} else {
- fail(position);
+ return fail(position);
}
- state = states.removeLast() | VALUE_READ_BITS;
+ state = restoreState() | VALUE_READ_BITS;
position++;
break;
default:
if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
- position = parseNumber(char, position);
state |= VALUE_READ_BITS;
+ position = parseNumber(char, position);
break;
}
}
- if (state != STATE_END) fail(position);
+ this.state = state;
}
/**
@@ -338,12 +860,14 @@ class _JsonParser {
* [:source[position]:] must be "t".
*/
int parseTrue(int position) {
- assert(source.codeUnitAt(position) == CHAR_t);
- if (source.length < position + 4) fail(position, "Unexpected identifier");
- if (source.codeUnitAt(position + 1) != CHAR_r ||
- source.codeUnitAt(position + 2) != CHAR_u ||
- source.codeUnitAt(position + 3) != CHAR_e) {
- fail(position);
+ assert(getChar(position) == CHAR_t);
+ if (chunkEnd < position + 4) {
+ return parseKeywordPrefix(position, "true", KWD_TRUE);
+ }
+ if (getChar(position + 1) != CHAR_r ||
+ getChar(position + 2) != CHAR_u ||
+ getChar(position + 3) != CHAR_e) {
+ return fail(position);
}
listener.handleBool(true);
return position + 4;
@@ -355,13 +879,15 @@ class _JsonParser {
* [:source[position]:] must be "f".
*/
int parseFalse(int position) {
- assert(source.codeUnitAt(position) == CHAR_f);
- if (source.length < position + 5) fail(position, "Unexpected identifier");
- if (source.codeUnitAt(position + 1) != CHAR_a ||
- source.codeUnitAt(position + 2) != CHAR_l ||
- source.codeUnitAt(position + 3) != CHAR_s ||
- source.codeUnitAt(position + 4) != CHAR_e) {
- fail(position);
+ assert(getChar(position) == CHAR_f);
+ if (chunkEnd < position + 5) {
+ return parseKeywordPrefix(position, "false", KWD_FALSE);
+ }
+ if (getChar(position + 1) != CHAR_a ||
+ getChar(position + 2) != CHAR_l ||
+ getChar(position + 3) != CHAR_s ||
+ getChar(position + 4) != CHAR_e) {
+ return fail(position);
}
listener.handleBool(false);
return position + 5;
@@ -373,17 +899,33 @@ class _JsonParser {
* [:source[position]:] must be "n".
*/
int parseNull(int position) {
- assert(source.codeUnitAt(position) == CHAR_n);
- if (source.length < position + 4) fail(position, "Unexpected identifier");
- if (source.codeUnitAt(position + 1) != CHAR_u ||
- source.codeUnitAt(position + 2) != CHAR_l ||
- source.codeUnitAt(position + 3) != CHAR_l) {
- fail(position);
+ assert(getChar(position) == CHAR_n);
+ if (chunkEnd < position + 4) {
+ return parseKeywordPrefix(position, "null", KWD_NULL);
+ }
+ if (getChar(position + 1) != CHAR_u ||
+ getChar(position + 2) != CHAR_l ||
+ getChar(position + 3) != CHAR_l) {
+ return fail(position);
}
listener.handleNull();
return position + 4;
}
+ int parseKeywordPrefix(int position, String chars, int type) {
+ assert(getChar(position) == chars.codeUnitAt(0));
+ int length = chunkEnd;
+ int start = position;
+ int count = 1;
+ while (++position < length) {
+ int char = getChar(position);
+ if (char != chars.codeUnitAt(count)) return fail(start);
+ count++;
+ }
+ this.partialState = PARTIAL_KEYWORD | type | (count << KWD_COUNT_SHIFT);
+ return length;
+ }
+
/**
* Parses a string value.
*
@@ -394,92 +936,207 @@ class _JsonParser {
// Format: '"'([^\x00-\x1f\\\"]|'\\'[bfnrt/\\"])*'"'
// Initial position is right after first '"'.
int start = position;
- while (position < source.length) {
- int char = source.codeUnitAt(position++);
+ int end = chunkEnd;
+ while (position < end) {
+ int char = getChar(position++);
// BACKSLASH is larger than QUOTE and SPACE.
if (char > BACKSLASH) {
continue;
}
if (char == BACKSLASH) {
- return parseStringWithEscapes(start, position - 1);
+ beginString();
+ addSliceToString(start, position - 1);
+ return parseStringToBuffer(position - 1);
}
if (char == QUOTE) {
- listener.handleString(source.substring(start, position - 1));
+ listener.handleString(getString(start, position - 1));
return position;
}
if (char < SPACE) {
fail(position - 1, "Control character in string");
}
}
- fail(start - 1, "Unterminated string");
+ beginString();
+ addSliceToString(start, end);
+ return chunkString(STR_PLAIN);
}
- int parseStringWithEscapes(start, position) {
- // Backslash escape detected. Collect character codes for rest of string.
- int firstEscape = position;
- List<int> chars = <int>[];
- for (int i = start; i < firstEscape; i++) {
- chars.add(source.codeUnitAt(i));
- }
- position++;
+ /**
+ * Sets up a partial string state.
+ *
+ * The state is either not inside an escape, or right after a backslash.
+ * For partial strings ending inside a Unicode escape, use
+ * [chunkStringEscapeU].
+ */
+ int chunkString(int stringState) {
+ partialState = PARTIAL_STRING | stringState;
+ return chunkEnd;
+ }
+
+ /**
+ * Sets up a partial string state for a partially parsed Unicode escape.
+ *
+ * The partial string state includes the current [buffer] and the
+ * number of hex digits of the Unicode seen so far (e.g., for `"\u30')
+ * the state knows that two digits have been seen, and what their value is.
+ *
+ * Returns [chunkEnd] so it can be used as part of a return statement.
+ */
+ int chunkStringEscapeU(int count, int value) {
+ partialState = PARTIAL_STRING | STR_U |
+ (count << STR_U_COUNT_SHIFT) |
+ (value << STR_U_VALUE_SHIFT);
+ return chunkEnd;
+ }
+
+ /**
+ * Parses the remainder of a string literal into a buffer.
+ *
+ * The buffer is stored in [buffer] and its underlying format depends on
+ * the input chunk type. For example UTF-8 decoding happens in the
+ * buffer, not in the parser, since all significant JSON characters are ASCII.
+ *
+ * This function scans through the string literal for escapes, and copies
+ * slices of non-escape characters using [addSliceToString].
+ */
+ int parseStringToBuffer(position) {
+ int end = chunkEnd;
+ int start = position;
while (true) {
- if (position == source.length) {
- fail(start - 1, "Unterminated string");
+ if (position == end) {
+ if (position > start) {
+ addSliceToString(start, position);
+ }
+ return chunkString(STR_PLAIN);
}
- int char = source.codeUnitAt(position);
- switch (char) {
- case CHAR_b: char = BACKSPACE; break;
- case CHAR_f: char = FORM_FEED; break;
- case CHAR_n: char = NEWLINE; break;
- case CHAR_r: char = CARRIAGE_RETURN; break;
- case CHAR_t: char = TAB; break;
- case SLASH:
- case BACKSLASH:
- case QUOTE:
- break;
- case CHAR_u:
- int hexStart = position - 1;
- int value = 0;
- for (int i = 0; i < 4; i++) {
- position++;
- if (position == source.length) {
- fail(start - 1, "Unterminated string");
- }
- char = source.codeUnitAt(position);
- char -= 0x30;
- if (char < 0) fail(hexStart, "Invalid unicode escape");
- if (char < 10) {
- value = value * 16 + char;
- } else {
- char = (char | 0x20) - 0x31;
- if (char < 0 || char > 5) {
- fail(hexStart, "Invalid unicode escape");
- }
- value = value * 16 + char + 10;
- }
- }
- char = value;
- break;
- default:
- if (char < SPACE) fail(position, "Control character in string");
- fail(position, "Unrecognized string escape");
+ int char = getChar(position++);
+ if (char > BACKSLASH) continue;
+ if (char < SPACE) {
+ fail(position - 1); // Control character in string.
+ return;
}
- do {
- chars.add(char);
- position++;
- if (position == source.length) fail(start - 1, "Unterminated string");
- char = source.codeUnitAt(position);
- if (char == QUOTE) {
- String result = new String.fromCharCodes(chars);
- listener.handleString(result);
- return position + 1;
+ if (char == QUOTE) {
+ int quotePosition = position - 1;
+ if (quotePosition > start) {
+ addSliceToString(start, quotePosition);
}
- if (char < SPACE) {
- fail(position, "Control character in string");
+ listener.handleString(endString());
+ return position;
+ }
+ if (char != BACKSLASH) {
+ continue;
+ }
+ // Handle escape.
+ if (position - 1 > start) {
+ addSliceToString(start, position - 1);
+ }
+ if (position == end) return chunkString(STR_ESCAPE);
+ position = parseStringEscape(position);
+ if (position == end) return position;
+ start = position;
+ }
+ return -1; // UNREACHABLE.
+ }
+
+ /**
+ * Parse a string escape.
+ *
+ * Position is right after the initial backslash.
+ * The following escape is parsed into a character code which is added to
+ * the current string buffer using [addCharToString].
+ *
+ * Returns position after the last character of the escape.
+ */
+ int parseStringEscape(int position) {
+ int char = getChar(position++);
+ int length = chunkEnd;
+ switch (char) {
+ case CHAR_b: char = BACKSPACE; break;
+ case CHAR_f: char = FORM_FEED; break;
+ case CHAR_n: char = NEWLINE; break;
+ case CHAR_r: char = CARRIAGE_RETURN; break;
+ case CHAR_t: char = TAB; break;
+ case SLASH:
+ case BACKSLASH:
+ case QUOTE:
+ break;
+ case CHAR_u:
+ int hexStart = position - 1;
+ int value = 0;
+ for (int i = 0; i < 4; i++) {
+ if (position == length) return chunkStringEscapeU(i, value);
+ char = getChar(position++);
+ int digit = char ^ 0x30;
+ value *= 16;
+ if (digit <= 9) {
+ value += digit;
+ } else {
+ digit = (char | 0x20) - CHAR_a;
+ if (digit < 0 || digit > 5) {
+ return fail(hexStart, "Invalid unicode escape");
+ }
+ value += digit + 10;
+ }
}
- } while (char != BACKSLASH);
- position++;
+ char = value;
+ break;
+ default:
+ if (char < SPACE) return fail(position, "Control character in string");
+ return fail(position, "Unrecognized string escape");
}
+ addCharToString(char);
+ if (position == length) return chunkString(STR_PLAIN);
+ return position;
+ }
+
+ /// Sets up a partial numeral state.
+ /// Returns chunkEnd to allow easy one-line bailout tests.
+ int beginChunkNumber(int state, int start) {
+ int end = chunkEnd;
+ int length = end - start;
+ var buffer = new _NumberBuffer(length);
+ copyCharsToList(start, end, buffer.list, 0);
+ buffer.length = length;
+ this.buffer = buffer;
+ this.partialState = PARTIAL_NUMERAL | state;
+ return end;
+ }
+
+ void addNumberChunk(_NumberBuffer buffer, int start, int end, int overhead) {
+ int length = end - start;
+ int count = buffer.length;
+ int newCount = count + length;
+ int newCapacity = newCount + overhead;
+ buffer.ensureCapacity(newCapacity);
+ copyCharsToList(start, end, buffer.list, count);
+ buffer.length = newCount;
+ }
+
+ // Continues an already chunked number accross an entire chunk.
+ int continueChunkNumber(int state, int start, _NumberBuffer buffer) {
+ int end = chunkEnd;
+ addNumberChunk(buffer, start, end, _NumberBuffer.kDefaultOverhead);
+ this.buffer = buffer;
+ this.partialState = PARTIAL_NUMERAL | state;
+ return end;
+ }
+
+ int finishChunkNumber(int state, int start, int end, _NumberBuffer buffer) {
+ if (state == NUM_ZERO) {
+ listener.handleNumber(0);
+ return;
+ }
+ if (end > start) {
+ addNumberChunk(buffer, start, end, 0);
+ }
+ if (state == NUM_DIGIT) {
+ listener.handleNumber(buffer.parseInt());
+ } else if (state == NUM_DOT_DIGIT || state == NUM_E_DIGIT) {
+ listener.handleNumber(buffer.parseDouble());
+ } else {
+ fail(chunkEnd, "Unterminated number literal");
+ }
+ return end;
}
int parseNumber(int char, int position) {
@@ -487,89 +1144,576 @@ class _JsonParser {
// Format:
// '-'?('0'|[1-9][0-9]*)('.'[0-9]+)?([eE][+-]?[0-9]+)?
int start = position;
- int length = source.length;
- int intValue = 0; // Collect int value while parsing.
- int intSign = 1;
+ int length = chunkEnd;
+ // Collects an int value while parsing. Used for both an integer literal,
+ // an the exponent part of a double literal.
+ int intValue = 0;
+ double doubleValue = 0.0; // Collect double value while parsing.
+ int sign = 1;
bool isDouble = false;
// Break this block when the end of the number literal is reached.
// At that time, position points to the next character, and isDouble
// is set if the literal contains a decimal point or an exponential.
parsing: {
if (char == MINUS) {
- intSign = -1;
+ sign = -1;
position++;
- if (position == length) fail(position, "Missing expected digit");
- char = source.codeUnitAt(position);
+ if (position == length) return beginChunkNumber(NUM_SIGN, start);
+ char = getChar(position);
}
- if (char < CHAR_0 || char > CHAR_9) {
- if (intSign < 0) {
+ int digit = char ^ CHAR_0;
+ if (digit > 9) {
+ if (sign < 0) {
fail(position, "Missing expected digit");
} else {
// If it doesn't even start out as a numeral.
fail(position, "Unexpected character");
}
}
- if (char == CHAR_0) {
+ if (digit == 0) {
position++;
- if (position == length) break parsing;
- char = source.codeUnitAt(position);
- if (CHAR_0 <= char && char <= CHAR_9) {
- fail(position);
- }
+ if (position == length) return beginChunkNumber(NUM_ZERO, start);
+ char = getChar(position);
+ digit = char ^ CHAR_0;
+ // If starting with zero, next character must not be digit.
+ if (digit <= 9) fail(position);
} else {
do {
- intValue = intValue * 10 + (char - CHAR_0);
+ intValue = 10 * intValue + digit;
position++;
- if (position == length) break parsing;
- char = source.codeUnitAt(position);
- } while (CHAR_0 <= char && char <= CHAR_9);
+ if (position == length) return beginChunkNumber(NUM_DIGIT, start);
+ char = getChar(position);
+ digit = char ^ CHAR_0;
+ } while (digit <= 9);
}
if (char == DECIMALPOINT) {
isDouble = true;
+ doubleValue = intValue.toDouble();
+ intValue = 0;
position++;
- if (position == length) fail(position, "Missing expected digit");
- char = source.codeUnitAt(position);
- if (char < CHAR_0 || char > CHAR_9) fail(position);
+ if (position == length) return beginChunkNumber(NUM_DOT, start);
+ char = getChar(position);
+ digit = char ^ CHAR_0;
+ if (digit > 9) fail(position);
do {
+ doubleValue = 10.0 * doubleValue + digit;
+ intValue -= 1;
position++;
- if (position == length) break parsing;
- char = source.codeUnitAt(position);
- } while (CHAR_0 <= char && char <= CHAR_9);
+ if (position == length) return beginChunkNumber(NUM_DOT_DIGIT, start);
+ char = getChar(position);
+ digit = char ^ CHAR_0;
+ } while (digit <= 9);
}
- if (char == CHAR_e || char == CHAR_E) {
- isDouble = true;
+ if ((char | 0x20) == CHAR_e) {
+ if (!isDouble) {
+ doubleValue = intValue.toDouble();
+ intValue = 0;
+ isDouble = true;
+ }
position++;
- if (position == length) fail(position, "Missing expected digit");
- char = source.codeUnitAt(position);
+ if (position == length) return beginChunkNumber(NUM_E, start);
+ char = getChar(position);
+ int expSign = 1;
+ int exponent = 0;
if (char == PLUS || char == MINUS) {
+ expSign = 0x2C - char; // -1 for MINUS, +1 for PLUS
position++;
- if (position == length) fail(position, "Missing expected digit");
- char = source.codeUnitAt(position);
+ if (position == length) return beginChunkNumber(NUM_E_SIGN, start);
+ char = getChar(position);
}
- if (char < CHAR_0 || char > CHAR_9) {
+ digit = char ^ CHAR_0;
+ if (digit > 9) {
fail(position, "Missing expected digit");
}
do {
+ exponent = 10 * exponent + digit;
position++;
- if (position == length) break parsing;
- char = source.codeUnitAt(position);
- } while (CHAR_0 <= char && char <= CHAR_9);
+ if (position == length) return beginChunkNumber(NUM_E_DIGIT, start);
+ char = getChar(position);
+ digit = char ^ CHAR_0;
+ } while (digit <= 9);
+ intValue += expSign * exponent;
}
}
if (!isDouble) {
- listener.handleNumber(intSign * intValue);
+ listener.handleNumber(sign * intValue);
return position;
}
- // This correctly creates -0.0 for doubles.
- listener.handleNumber(_parseDouble(source, start, position));
+ // Double values at or above this value (2**53) may have lost precission.
+ // Only trust results that are below this value.
+ const double maxExactDouble = 9007199254740992.0;
+ if (doubleValue < maxExactDouble) {
+ int exponent = intValue;
+ double signedMantissa = doubleValue * sign;
+ if (exponent >= -22) {
+ if (exponent < 0) {
+ listener.handleNumber(signedMantissa / POWERS_OF_TEN[-exponent]);
+ return position;
+ }
+ if (exponent == 0) {
+ listener.handleNumber(signedMantissa);
+ return position;
+ }
+ if (exponent <= 22) {
+ listener.handleNumber(signedMantissa * POWERS_OF_TEN[exponent]);
+ return position;
+ }
+ }
+ }
+ // If the value is outside the range +/-maxExactDouble or
+ // exponent is outside the range +/-22, then we can't trust simple double
+ // arithmetic to get the exact result, so we use the system double parsing.
+ listener.handleNumber(parseDouble(start, position));
return position;
}
- static double _parseDouble(String source, int start, int end)
- native "Double_parse";
+ int fail(int position, [String message]) {
+ if (message == null) {
+ message = "Unexpected character";
+ if (position == chunkEnd) message = "Unexpected end of input";
+ }
+ throw new FormatException(message, chunk, position);
+ }
+}
+
+/**
+ * Chunked JSON parser that parses [String] chunks.
+ */
+class _JsonStringParser extends _ChunkedJsonParser {
+ String chunk;
+ int chunkEnd;
+
+ _JsonStringParser(_JsonListener listener) : super(listener);
+
+ int getChar(int position) => chunk.codeUnitAt(position);
+
+ String getString(int start, int end) {
+ return chunk.substring(start, end);
+ }
+
+ void beginString() {
+ this.buffer = new StringBuffer();
+ }
+
+ void addSliceToString(int start, int end) {
+ StringBuffer buffer = this.buffer;
+ buffer.write(chunk.substring(start, end));
+ }
+
+ void addCharToString(int charCode) {
+ StringBuffer buffer = this.buffer;
+ buffer.writeCharCode(charCode);
+ }
+
+ String endString() {
+ StringBuffer buffer = this.buffer;
+ this.buffer = null;
+ return buffer.toString();
+ }
+
+ void copyCharsToList(int start, int end, List target, int offset) {
+ int length = end - start;
+ for (int i = 0; i < length; i++) {
+ target[offset + i] = chunk.codeUnitAt(start + i);
+ }
+ }
+
+ double parseDouble(int start, int end) {
+ return _parseDouble(chunk, start, end);
+ }
+}
+
+patch class JsonDecoder {
+ /* patch */ StringConversionSink startChunkedConversion(Sink<Object> sink) {
+ return new _JsonStringDecoderSink(this._reviver, sink);
+ }
+}
+
+/**
+ * Implements the chunked conversion from a JSON string to its corresponding
+ * object.
+ *
+ * The sink only creates one object, but its input can be chunked.
+ */
+class _JsonStringDecoderSink extends StringConversionSinkBase {
+ _ChunkedJsonParser _parser;
+ Function _reviver;
+ final Sink<Object> _sink;
+
+ _JsonStringDecoderSink(reviver, this._sink)
+ : _reviver = reviver, _parser = _createParser(reviver);
+
+ static _ChunkedJsonParser _createParser(reviver) {
+ _BuildJsonListener listener;
+ if (reviver == null) {
+ listener = new _BuildJsonListener();
+ } else {
+ listener = new _ReviverJsonListener(reviver);
+ }
+ return new _JsonStringParser(listener);
+ }
+
+ void addSlice(String chunk, int start, int end, bool isLast) {
+ _parser.chunk = chunk;
+ _parser.chunkEnd = end;
+ _parser.parse(start);
+ if (isLast) _parser.close();
+ }
+
+ void add(String chunk) {
+ addSlice(chunk, 0, chunk.length, false);
+ }
+
+ void close() {
+ _parser.close();
+ var decoded = _parser.result;
+ _sink.add(decoded);
+ _sink.close();
+ }
+
+ Utf8ConversionSink asUtf8Sink(bool allowMalformed) {
+ _parser = null;
+ return new _JsonUtf8DecoderSink(_reviver, _sink, allowMalformed);
+ }
+}
+
+class _Utf8StringBuffer {
+ static const int INITIAL_CAPACITY = 32;
+ // Partial state encoding.
+ static const int MASK_TWO_BIT = 0x03;
+ static const int MASK_SIZE = MASK_TWO_BIT;
+ static const int SHIFT_MISSING = 2;
+ static const int SHIFT_VALUE = 4;
+ static const int NO_PARTIAL = 0;
+
+ // UTF-8 encoding and limits.
+ static const int MAX_ASCII = 127;
+ static const int MAX_TWO_BYTE = 0x7ff;
+ static const int MAX_THREE_BYTE = 0xffff;
+ static const int MAX_UNICODE = 0X10ffff;
+ static const int MASK_TWO_BYTE = 0x1f;
+ static const int MASK_THREE_BYTE = 0x0f;
+ static const int MASK_FOUR_BYTE = 0x07;
+ static const int MASK_CONTINUE_TAG = 0xC0;
+ static const int MASK_CONTINUE_VALUE = 0x3f;
+ static const int CONTINUE_TAG = 0x80;
+
+ // UTF-16 surrogate encoding.
+ static const int LEAD_SURROGATE = 0xD800;
+ static const int TAIL_SURROGATE = 0xDC00;
+ static const int SHIFT_HIGH_SURROGATE = 10;
+ static const int MASK_LOW_SURROGATE = 0x3ff;
+
+ // The internal buffer starts as Uint8List, but may change to Uint16List
+ // if the string contains non-Latin-1 characters.
+ List<int> buffer = new Uint8List(INITIAL_CAPACITY);
+ // Number of elements in buffer.
+ int length = 0;
+ // Partial decoding state, for cases where an UTF-8 sequences is split
+ // between chunks.
+ int partialState = NO_PARTIAL;
+ // Whether all characters so far have been Latin-1 (and the buffer is
+ // still a Uint8List). Set to false when the first non-Latin-1 character
+ // is encountered, and the buffer is then also converted to a Uint16List.
+ bool isLatin1 = true;
+ // If allowing malformed, invalid UTF-8 sequences are converted to
+ // U+FFFD.
+ bool allowMalformed;
+
+ _Utf8StringBuffer(this.allowMalformed);
+
+ /**
+ * Parse the continuation of a multi-byte UTF-8 sequence.
+ *
+ * Parse [utf8] from [position] to [end]. If the sequence extends beyond
+ * `end`, store the partial state in [partialState], and continue from there
+ * on the next added slice.
+ *
+ * The [size] is the number of expected continuation bytes total,
+ * and [missing] is the number of remaining continuation bytes.
+ * The [size] is used to detect overlong encodings.
+ * The [value] is the value collected so far.
+ *
+ * When called after seeing the first multi-byte marker, the [size] and
+ * [missing] values are always the same, but they may differ if continuing
+ * after a partial sequence.
+ */
+ int addContinuation(List<int> utf8, int position, int end,
+ int size, int missing, int value) {
+ int codeEnd = position + missing;
+ do {
+ if (position == end) {
+ missing = codeEnd - position;
+ partialState =
+ size | (missing << SHIFT_MISSING) | (value << SHIFT_VALUE);
+ return end;
+ }
+ int char = utf8[position];
+ if ((char & MASK_CONTINUE_TAG) != CONTINUE_TAG) {
+ if (allowMalformed) {
+ addCharCode(0xFFFD);
+ return position;
+ }
+ throw new FormatException("Expected UTF-8 continuation byte, "
+ "found $char", utf8, position);
+ }
+ value = 64 * value + (char & MASK_CONTINUE_VALUE);
+ position++;
+ } while (position < codeEnd);
+ if (value <= const [0, MAX_ASCII, MAX_TWO_BYTE, MAX_THREE_BYTE][size]) {
+ // Over-long encoding.
+ if (allowMalformed) {
+ value = 0xFFFD;
+ } else {
+ throw new FormatException(
+ "Invalid encoding: U+${value.toRadixString(16).padLeft(4, '0')}"
+ " encoded in ${size + 1} bytes.", utf8, position - 1);
+ }
+ }
+ addCharCode(value);
+ return position;
+ }
+
+ void addCharCode(int char) {
+ assert(char >= 0);
+ assert(char <= MAX_UNICODE);
+ if (partialState != NO_PARTIAL) {
+ if (allowMalformed) {
+ partialState = NO_PARTIAL;
+ addCharCode(0xFFFD);
+ } else {
+ throw new FormatException("Incomplete UTF-8 sequence", utf8);
+ }
+ }
+ if (isLatin1 && char > 0xff) {
+ _to16Bit(); // Also grows a little if close to full.
+ }
+ int length = this.length;
+ if (char <= MAX_THREE_BYTE) {
+ if (length == buffer.length) _grow();
+ buffer[length] = char;
+ this.length = length + 1;
+ return;
+ }
+ if (length + 2 > buffer.length) _grow();
+ int bits = char - 0x10000;
+ buffer[length] = LEAD_SURROGATE | (bits >> SHIFT_HIGH_SURROGATE);
+ buffer[length + 1] = TAIL_SURROGATE | (bits & MASK_LOW_SURROGATE);
+ this.length = length + 2;
+ }
+
+ void _to16Bit() {
+ assert(isLatin1);
+ Uint16List newBuffer;
+ if ((length + INITIAL_CAPACITY) * 2 <= buffer.length) {
+ // Reuse existing buffer if it's big enough.
+ newBuffer = new Uint16List.view(buffer.buffer);
+ } else {
+ int newCapacity = buffer.length;
+ if (newCapacity - length < INITIAL_CAPACITY) {
+ newCapacity = length + INITIAL_CAPACITY;
+ }
+ newBuffer = new Uint16List(newCapacity);
+ }
+ newBuffer.setRange(0, length, buffer);
+ buffer = newBuffer;
+ isLatin1 = false;
+ }
+
+ void _grow() {
+ int newCapacity = buffer.length * 2;
+ List newBuffer;
+ if (isLatin1) {
+ newBuffer = new Uint8List(newCapacity);
+ } else {
+ newBuffer = new Uint16List(newCapacity);
+ }
+ newBuffer.setRange(0, length, buffer);
+ buffer = newBuffer;
+ }
+
+ void addSlice(List<int> utf8, int position, int end) {
+ assert(position < end);
+ if (partialState > 0) {
+ int continueByteCount = (partialState & MASK_TWO_BIT);
+ int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT;
+ int value = partialState >> SHIFT_VALUE;
+ partialState = NO_PARTIAL;
+ position = addContinuation(utf8, position, end,
+ continueByteCount, missing, value);
+ if (position == end) return;
+ }
+ // Keep index and capacity in local variables while looping over
+ // ASCII characters.
+ int index = length;
+ int capacity = buffer.length;
+ while (position < end) {
+ int char = utf8[position];
+ if (char <= MAX_ASCII) {
+ if (index == capacity) {
+ length = index;
+ _grow();
+ capacity = buffer.length;
+ }
+ buffer[index++] = char;
+ position++;
+ continue;
+ }
+ length = index;
+ if ((char & MASK_CONTINUE_TAG) == CONTINUE_TAG) {
+ if (allowMalformed) {
+ addCharCode(0xFFFD);
+ position++;
+ } else {
+ throw new FormatException("Unexepected UTF-8 continuation byte",
+ utf8, position);
+ }
+ } else if (char < 0xE0) { // C0-DF
+ // Two-byte.
+ position = addContinuation(utf8, position + 1, end, 1, 1,
+ char & MASK_TWO_BYTE);
+ } else if (char < 0xF0) { // E0-EF
+ // Three-byte.
+ position = addContinuation(utf8, position + 1, end, 2, 2,
+ char & MASK_THREE_BYTE);
+ } else if (char < 0xF8) { // F0-F7
+ // Four-byte.
+ position = addContinuation(utf8, position + 1, end, 3, 3,
+ char & MASK_FOUR_BYTE);
+ } else {
+ if (allowMalformed) {
+ addCharCode(0xFFFD);
+ position++;
+ } else {
+ throw new FormatException("Invalid UTF-8 byte: $char",
+ utf8, position);
+ }
+ }
+ index = length;
+ capacity = buffer.length;
+ }
+ length = index;
+ }
+
+ String toString() {
+ if (partialState != NO_PARTIAL) {
+ if (allowMalformed) {
+ partialState = NO_PARTIAL;
+ addCharCode(0xFFFD);
+ } else {
+ int continueByteCount = (partialState & MASK_TWO_BIT);
+ int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT;
+ int value = partialState >> SHIFT_VALUE;
+ int seenByteCount = continueByteCount - missing + 1;
+ List source = new Uint8List(seenByteCount);
+ while (seenByteCount > 1) {
+ seenByteCount--;
+ source[seenByteCount] = CONTINUE_TAG | (value & MASK_CONTINUE_VALUE);
+ value >>= 6;
+ }
+ source[0] = value | (0x3c0 >> (continueByteCount - 1));
+ throw new FormatException("Incomplete UTF-8 sequence",
+ source, source.length);
+ }
+ }
+ return new String.fromCharCodes(buffer, 0, length);
+ }
+}
+
+/**
+ * Chunked JSON parser that parses UTF-8 chunks.
+ */
+class _JsonUtf8Parser extends _ChunkedJsonParser {
+ final bool allowMalformed;
+ List<int> chunk;
+ int chunkEnd;
+
+ _JsonUtf8Parser(_JsonListener listener, this.allowMalformed)
+ : super(listener);
+
+ int getChar(int position) => chunk[position];
+
+ String getString(int start, int end) {
+ beginString();
+ addSliceToString(start, end);
+ String result = endString();
+ return result;
+ }
+
+ void beginString() {
+ this.buffer = new _Utf8StringBuffer(allowMalformed);
+ }
+
+ void addSliceToString(int start, int end) {
+ _Utf8StringBuffer buffer = this.buffer;
+ buffer.addSlice(chunk, start, end);
+ }
+
+ void addCharToString(int charCode) {
+ _Utf8StringBuffer buffer = this.buffer;
+ buffer.addCharCode(charCode);
+ }
+
+ String endString() {
+ _Utf8StringBuffer buffer = this.buffer;
+ this.buffer = null;
+ return buffer.toString();
+ }
+
+ void copyCharsToList(int start, int end, List target, int offset) {
+ int length = end - start;
+ target.setRange(offset, offset + length, chunk, start);
+ }
+
+ double parseDouble(int start, int end) {
+ String string = getString(start, end);
+ reutrn _parseDouble(string, 0, string.length);
+ }
+}
+
+double _parseDouble(String source, int start, int end)
+ native "Double_parse";
+
+/**
+ * Implements the chunked conversion from a UTF-8 encoding of JSON
+ * to its corresponding object.
+ */
+class _JsonUtf8DecoderSink extends ByteConversionSinkBase {
+ _ChunkedUtf8Parser _parser;
+ final Sink<Object> _sink;
+
+ _JsonUtf8DecoderSink(reviver, this._sink, bool allowMalformed)
+ : _parser = _createParser(reviver, allowMalformed);
+
+ static _ChunkedJsonParser _createParser(reviver, bool allowMalformed) {
+ _BuildJsonListener listener;
+ if (reviver == null) {
+ listener = new _BuildJsonListener();
+ } else {
+ listener = new _ReviverJsonListener(reviver);
+ }
+ return new _JsonUtf8Parser(listener, allowMalformed);
+ }
+
+ void addSlice(List<int> chunk, int start, int end, bool isLast) {
+ _addChunk(chunk, start, end);
+ if (isLast) close();
+ }
+
+ void add(List<int> chunk) {
+ _addChunk(chunk, 0, chunk.length);
+ }
+
+ void _addChunk(List<int> chunk, int start, int end) {
+ _parser.chunk = chunk;
+ _parser.chunkEnd = end;
+ _parser.parse(start);
+ }
- void fail(int position, [String message]) {
- if (message == null) message = "Unexpected character";
- throw new FormatException(message, source, position);
+ void close() {
+ _parser.close();
+ var decoded = _parser.result;
+ _sink.add(decoded);
+ _sink.close();
}
}
« no previous file with comments | « no previous file | runtime/lib/double_patch.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698