runtime/lib/convert_patch.dart - Issue 649113005: Make JSON parsing work as a chunked conversion sink.

Unified Diff: runtime/lib/convert_patch.dart

Issue 649113005: Make JSON parsing work as a chunked conversion sink. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Also add an UTF-8 base JSON parser, without intermediate string representations. Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: runtime/lib/convert_patch.dart

diff --git a/runtime/lib/convert_patch.dart b/runtime/lib/convert_patch.dart

index 2a4ab1bc8af658b484fc94e8726a2c2afc8f8627..e00db53811bc1d667bbe37b1b710506c086b3c77 100644

--- a/runtime/lib/convert_patch.dart

+++ b/runtime/lib/convert_patch.dart

@@ -1,7 +1,9 @@

// BSD-style license that can be found in the LICENSE file.

+import "dart:_internal" show POWERS_OF_TEN;

// JSON conversion.

patch _parseJson(String json, reviver(var key, var value)) {

@@ -11,7 +13,11 @@ patch _parseJson(String json, reviver(var key, var value)) {

} else {

listener = new _ReviverJsonListener(reviver);

}

- new _JsonParser(json, listener).parse();

+ var parser = new _JsonStringParser(listener);

+ parser.chunk = json;

+ parser.chunkEnd = json.length;

+ parser.parse(0);

+ parser.close();

return listener.result;

}

@@ -19,6 +25,9 @@ patch _parseJson(String json, reviver(var key, var value)) {

// Simple API for JSON parsing.

+/**

+ * Listener for parsing events from [_ChunkedJsonParser].

+ */

abstract class _JsonListener {

void handleString(String value) {}

void handleNumber(num value) {}

@@ -34,7 +43,7 @@ abstract class _JsonListener {

}

/**

- * A [JsonListener] that builds data objects from the parser events.

+ * A [_JsonListener] that builds data objects from the parser events.

* This is a simple stack-based object builder. It keeps the most recently

* seen value in a variable, and uses it depending on the following event.

@@ -135,7 +144,74 @@ class _ReviverJsonListener extends _BuildJsonListener {

}

-class _JsonParser {

+/**

+ * Buffer holding parts of a numeral.

+ *

+ * The buffer contains the characters of a JSON number.

+ * These are all ASCII, so an [Uint8List] is used as backing store.

+ *

+ * This buffer is used when a JSON number is split between separate chunks.

+ *

+ */

+class _NumberBuffer {

+ static const int kMinCapacity = 16;

floitsch 2014/10/20 08:52:44 constants in Dart don't start with "k". maybe they

Lasse Reichstein Nielsen 2014/10/27 12:42:32 I know. The style guide changed so they are no lon

+ static const int kDefaultOverhead = 5;

+ Uint8List list;

+ int length = 0;

+ _NumberBuffer(int initialCapacity)

+ : list = new Uint8List(_initialCapacity(initialCapacity));

+ int get capacity => list.length;

+ // Pick an initial capacity greater than the first part's size.

+ // The typical use case has two parts, this is the attempt at

+ // guessing the size of the second part without overdoing it.

+ // The default estimate of the second part is [kDefaultOverhead],

+ // then round to multiplum of four, and return the result,

+ // or [kMinCapacity] if that is greater.

+ static int _initialCapacity(int minCapacity) {

+ minCapacity += kDefaultOverhead;

+ if (minCapacity < kMinCapacity) return kMinCapacity;

+ minCapacity = (minCapacity + 3) & ~3; // Round to multile of four.

floitsch 2014/10/20 08:52:44 multiple

Lasse Reichstein Nielsen 2014/10/27 12:42:32 Done.

+ return minCapacity;

+ }

+ // Grows to the exact size asked for.

+ void ensureCapacity(int newCapcity) {

floitsch 2014/10/20 08:52:44 newCapacity

Lasse Reichstein Nielsen 2014/10/27 12:42:33 Done.

+ Uint8List list = this.list;

+ if (newCapcity <= list.length) return;

+ Uint8List newList = new Uint8List(newCapcity);

+ newList.setRange(0, list.length, list, 0);

+ this.list = newList;

+ }

+ String toString() => "NumberBuffer";

floitsch 2014/10/20 08:52:44 maybe add the contents? => "NumberBuffer(${getStr

Lasse Reichstein Nielsen 2014/10/27 12:42:32 I think I had that for debugging, but I'll just re

+ String getString() {

+ var list = this.list;

+ if (length < list.length) {

+ list = new Uint8List.view(list.buffer, 0, length);

+ }

+ String result = new String.fromCharCodes(list);

+ return result;

+ }

+ // TODO(lrn): See if parsing of numbers can be abstracted to something

+ // not only working on strings, but also on char-code lists, without lossing

+ // performance.

+ int parseInt() => int.parse(getString());

+ double parseDouble() => double.parse(getString());

+/**

+ * Chunked JSON parser.

+ *

+ * Receives inputs in chunks, gives access to individual parts of the input,

+ * and stores input state between chunks.

+ *

+ * Implementations include [String] and UTF-8 parsers.

+ */

+abstract class _ChunkedJsonParser {

// A simple non-recursive state-based parser for JSON.

// Literal values accepted in states ARRAY_EMPTY, ARRAY_COMMA, OBJECT_COLON

@@ -172,11 +248,11 @@ class _JsonParser {

static const int NO_VALUES = 12;

// Objects and arrays are "empty" until their first property/element.

+ // At this position, they may either have an entry or a close-bracket.

static const int EMPTY = 0;

static const int NON_EMPTY = 16;

static const int EMPTY_MASK = 16; // Empty if zero.

static const int VALUE_READ_BITS = NO_VALUES | NON_EMPTY;

// Actual states.

@@ -226,18 +302,394 @@ class _JsonParser {

static const int LBRACE = 0x7b;

static const int RBRACE = 0x7d;

- final String source;

+ // State of partial value at chunk split.

+ static const int NO_PARTIAL = 0;

+ static const int PARTIAL_STRING = 1;

+ static const int PARTIAL_NUMERAL = 2;

+ static const int PARTIAL_KEYWORD = 3;

+ static const int MASK_PARTIAL = 3;

+ // Partial states for numerals. Values can be |'ed with PARTIAL_NUMERAL.

+ static const int NUM_SIGN = 0; // After initial '-'.

+ static const int NUM_ZERO = 4; // After '0' as first digit.

+ static const int NUM_DIGIT = 8; // After digit, no '.' or 'e' seen.

+ static const int NUM_DOT = 12; // After '.'.

+ static const int NUM_DOT_DIGIT = 16; // After a decimal digit (after '.').

+ static const int NUM_E = 20; // After 'e' or 'E'.

+ static const int NUM_E_SIGN = 24; // After '-' or '+' after 'e' or 'E'.

+ static const int NUM_E_DIGIT = 28; // After exponent digit.

+ static const int NUM_SUCCESS = 32; // Never stored as partial state.

+ // Partial states for strings.

+ static const int STR_PLAIN = 0; // Inside string, but not escape.

+ static const int STR_ESCAPE = 4; // After '\'.

+ static const int STR_U = 16; // After '\u' and 0-3 hex digits.

+ static const int STR_U_COUNT_SHIFT = 2; // Hex digit count in bits 2-3.

+ static const int STR_U_VALUE_SHIFT = 5; // Hex digit value in bits 5+.

+ // Partial states for keywords.

+ static const int KWD_TYPE_MASK = 12;

+ static const int KWD_TYPE_SHIFT = 2;

+ static const int KWD_NULL = 0; // Prefix of "null" seen.

+ static const int KWD_TRUE = 4; // Prefix of "true" seen.

+ static const int KWD_FALSE = 8; // Prefix of "false" seen.

+ static const int KWD_COUNT_SHIFT = 4; // Prefix length in bits 4+.

+ // Mask used to mask off two lower bits.

+ static const int TWO_BIT_MASK = 3;

final _JsonListener listener;

- _JsonParser(this.source, this.listener);

+ // The current parsing state.

+ int state = STATE_INITIAL;

+ List<int> states = <int>[];

+ /**

+ * Stores tokenizer state between chunks.

+ *

+ * This state is stored when a chunk stops in the middle of a

+ * token (string, numeral, boolean or null).

+ *

+ * The partial state is used to continue parsing on the next chunk.

+ * The previous chunk is not retained, any data needed are stored in

+ * this integer, or in the [buffer] field as a string-building buffer

+ * or a [_NumberBuffer].

+ *

+ * Prefix state stored in [prefixState] as bits.

+ *

+ * ..00 : No partial value (NO_PARTIAL).

+ *

+ * ..00001 : Partial string, not inside escape.

+ * ..00101 : Partial string, after '\'.

+ * ..vvvv1dd01 : Partial \u escape.

+ * The 'dd' bits (2-3) encode the number of hex digits seen.

+ * Bits 5-16 encode the value of the hex digits seen so far.

+ *

+ * ..0ddd10 : Partial numeral.

+ * The `ddd` bits store the parts of in the numeral seen so

+ * far, as the constants `NUM_*` defined above.

+ * The characters of the numeral are stored in [buffer]

+ * as a [_NumberBuffer].

+ *

+ * ..0ddd0011 : Partial 'null' keyword.

+ * ..0ddd0111 : Partial 'true' keyword.

+ * ..0ddd1011 : Partial 'false' keyword.

+ * For all three keywords, the `ddd` bits encode the number

+ * of letters seen.

+ */

+ int partialState = NO_PARTIAL;

+ /**

+ * Extra data stored while parsing a primitive value.

+ * May be set during parsing, always set at chunk end if a value is partial.

+ *

+ * May contain a string buffer while parsing strings.

+ */

+ var buffer = null;

+ _ChunkedJsonParser(this.listener);

+ /**

+ * Push the current parse [state] on a stack.

+ *

+ * State is pushed when a new array or object literal starts,

+ * so the parser can go back to the correct value when the literal ends.

+ */

+ void saveState(int state) {

+ states.add(state);

+ }

+ /**

+ * Restore a state pushed with [saveState].

+ */

+ int restoreState() {

+ return states.removeLast(); // Throws if empty.

+ }

+ /**

+ * Finalizes the parsing.

+ *

+ * If the source ends in a number, it will be completed. Any other partial

+ * state is an error.

Søren Gjesse 2014/10/24 11:12:24 And the states stack is empty, right?

Lasse Reichstein Nielsen 2014/10/27 12:42:33 That's what the next paragraph tries to say. I'll

+ *

+ * Throws if the source read so far doesn't end up with a complete

+ * parsed value.

+ */

+ void close() {

+ if (partialState != NO_PARTIAL) {

+ int partialType = partialState & MASK_PARTIAL;

+ if (partialType == PARTIAL_NUMERAL) {

+ int numState = partialState & ~MASK_PARTIAL;

+ // A partial number might be a valid number if we know it's done.

+ // There is an unnecessary overhead if input is a single number,

+ // but this is assumed to be rare.

+ _NumberBuffer buffer = this.buffer;

+ this.buffer = null;

+ finishChunkNumber(numState, 0, 0, buffer);

+ } else if (partialType == PARTIAL_STRING) {

+ fail(chunkEnd, "Unterminate string");

Søren Gjesse 2014/10/24 11:12:24 Unterminated

Lasse Reichstein Nielsen 2014/10/27 12:42:32 Done.

+ } else {

+ assert(partialType == PARTIAL_KEYWORD);

+ fail(chunkEnd); // Incomplete literal.

+ }

+ if (state != STATE_END) {

+ fail(chunkEnd);

+ }

+ /**

+ * Read out the result after successfully closing the parser.

+ *

+ * The parser is closed by calling [close] or calling [addSourceChunk] with

+ * `true` as second (`isLast`) argument.

+ */

+ Object get result {

+ return listener.result;

+ }

+ // Sets the current source chunk.

floitsch 2014/10/20 08:52:44 Make all these comments dartdocs.

Lasse Reichstein Nielsen 2014/10/27 12:42:33 Done.

+ void set chunk(var source);

+ // Length of current chunk.

+ int get chunkEnd;

+ // Returns the chunk itself. Used by fail to include it in FormatException.

Søren Gjesse 2014/10/24 11:12:23 So the FormatException only have the chunk as the

Lasse Reichstein Nielsen 2014/10/27 12:42:32 Yes, that's all we have. The FormatException will

+ get chunk;

+ // Get charcacter/code unit of current chunk.

+ int getChar(int index);

+ // Copy ASCII characters from start to end of chunk into a list.

+ // Used for number buffer (always copies ASCII, so encoding is not important).

+ void copyCharsToList(int start, int end, List<int> target);

+ // Build a string using input code units. Creates a string buffer

+ // and enables adding characters and slices to that buffer.

+ // The buffer is stored in [buffer]. If the string is unterminated,

+ // the same buffer is used to continue parsing in the next chunk.

+ void beginString();

+ // Add single character code to string being built.

+ void addCharToString(int charCode);

+ // Adds slice of current chunk to string being built.

floitsch 2014/10/20 08:52:44 end exclusive?

Lasse Reichstein Nielsen 2014/10/27 12:42:33 Acknowledged.

+ void addSliceToString(int start, int end);

+ // Finalizes the string being built and returns it as a String.

+ String endString();

+ // Extracts a literal string from a source slice.

Søren Gjesse 2014/10/24 11:12:23 source slice -> chunk slice

Lasse Reichstein Nielsen 2014/10/27 12:42:32 Done.

+ // No interpretation of the content is performed, except for converting

+ // the source format to string.

+ // This can be implemented more or less efficiently depending on the

+ // underlying source.

+ String getString(int start, int end);

+ // Parse a slice of input as an integer.

Søren Gjesse 2014/10/24 11:12:24 slice of input -> chunk slice

Lasse Reichstein Nielsen 2014/10/27 12:42:33 Done.

+ // The format is expected to be correct.

+ int parseInt(int start, int end) {

+ return int.parse(getString(start, end));

+ }

+ // Parse a slice of input as a double.

Søren Gjesse 2014/10/24 11:12:24 ditto.

Lasse Reichstein Nielsen 2014/10/27 12:42:33 Done.

+ // The format is expected to be correct.

+ double parseDouble(int start, int end) {

+ return double.parse(getString(start, end));

+ }

+ // Create a _NumberBuffer containing the digits from [start] to [chunkEnd].

+ void createNumberBuffer(int start) {

+ assert(start >= 0);

+ assert(start < chunkEnd);

+ int length = chunkEnd - start;

+ var buffer = new _NumberBuffer(length);

+ copyCharsToList(start, chunkEnd, buffer.list);

+ buffer.length = length;

+ return buffer;

+ }

+ /**

+ * Continues parsing a partial value.

+ */

+ int parsePartial(int position) {

+ if (position == chunkEnd) return position;

+ int partialState = this.partialState;

+ assert(partialState != NO_PARTIAL);

+ int partialType = partialState & MASK_PARTIAL;

+ this.partialState = NO_PARTIAL;

+ partialState = partialState & ~MASK_PARTIAL;

+ assert(partialType != 0);

+ if (partialType == PARTIAL_STRING) {

+ position = parsePartialString(position, partialState);

+ } else if (partialType == PARTIAL_NUMERAL) {

+ position = parsePartialNumber(position, partialState);

+ } else if (partialType == PARTIAL_KEYWORD) {

+ position = parsePartialKeyword(position, partialState);

+ }

+ return position;

+ }

+ // Parses the remainder of a number into the number buffer,

+ // checking syntax as it goes.

+ // Starts at chunk index 0, and returns the index of the first

Søren Gjesse 2014/10/24 11:12:24 chunk index 0 -> current chunk index?

Lasse Reichstein Nielsen 2014/10/27 12:42:33 At [position] actually.

+ // non-digit character found, or chunkEnd if the entire chunk is

+ // used.

+ // Throws if a syntax error is detected.

+ int parsePartialNumber(int position, int state) {

+ int start = position;

+ // Primitive implementation, can be optimized.

+ _NumberBuffer buffer = this.buffer;

+ this.buffer = null;

+ int end = chunkEnd;

+ toBailout: {

+ if (position == end) break toBailout;

+ int char = getChar(position);

+ int digit = char ^ CHAR_0;

+ if (state == NUM_SIGN) {

+ if (digit <= 9) {

+ if (digit == 0) {

+ state = NUM_ZERO;

+ } else {

+ state = NUM_DIGIT;

+ }

+ position++;

+ if (position == end) break toBailout;

+ char = getChar(position);

+ digit = char ^ CHAR_0;

+ } else {

+ return fail(position);

+ }

+ if (state == NUM_ZERO) {

+ if (digit <= 9) return fail(position);

floitsch 2014/10/20 08:52:44 Add comment, why this is not allowed.

Lasse Reichstein Nielsen 2014/10/27 12:42:33 Done.

+ state = NUM_DIGIT;

+ }

+ while (state == NUM_DIGIT) {

+ if (digit > 9) {

+ if (char == DECIMALPOINT) {

+ state = NUM_DOT;

+ } else if ((char | 0x20) == CHAR_e) {

+ state = NUM_E;

+ } else {

+ finishChunkNumber(state, start, position, buffer);

+ return position;

+ }

+ position++;

+ if (position == end) break toBailout;

+ char = getChar(position);

+ digit = char ^ CHAR_0;

+ }

+ if (state == NUM_DOT) {

+ if (digit > 9) return fail(position);

+ state = NUM_DOT_DIGIT;

+ }

+ while (state == NUM_DOT_DIGIT) {

+ if (digit > 9) {

+ if ((char | 0x20) == CHAR_e) {

+ state = NUM_E;

+ } else {

+ finishChunkNumber(state, start, position, buffer);

+ return position;

+ }

+ position++;

+ if (position == end) break toBailout;

+ char = getChar(position);

+ digit = char ^ CHAR_0;

+ }

+ if (state == NUM_E) {

+ if (char == PLUS || char == MINUS) {

+ state = NUM_E_SIGN;

+ position++;

+ if (position == end) break toBailout;

+ char = getChar(position);

+ digit = char ^ CHAR_0;

+ }

+ assert(state >= NUM_E);

+ while (digit <= 9) {

+ state = NUM_E_DIGIT;

+ position++;

+ if (position == end) break toBailout;

+ char = getChar(position);

+ digit = char ^ CHAR_0;

+ }

+ finishChunkNumber(state, start, position, buffer);

+ return position;

+ }

+ // Bailout code in case the current chunk ends while parsing the numeral.

+ assert(position == end);

+ continueChunkNumber(state, start, buffer);

+ return chunkEnd;

+ }

+ int parsePartialString(int position, int partialState) {

+ if (partialState == STR_PLAIN) {

+ return parseStringToBuffer(position);

+ }

+ if (partialState == STR_ESCAPE) {

+ position = parseStringEscape(position);

+ // parseStringEscape sets partialState if it sees the end.

+ if (position == chunkEnd) return position;

+ return parseStringToBuffer(position);

+ }

+ assert((partialState & STR_U) != 0);

+ int value = partialState >> STR_U_VALUE_SHIFT;

+ int count = (partialState >> STR_U_COUNT_SHIFT) & TWO_BIT_MASK;

+ for (int i = count; i < 4; i++, position++) {

+ if (position == chunkEnd) return chunkStringEscapeU(i, value);

+ int char = getChar(position);

+ int digit = parseHexDigit(char);

+ if (digit < 0) fail(position, "Invalid hex digit");

+ value = 16 * value + digit;

+ }

+ addCharToString(value);

+ return parseStringToBuffer(position);

+ }

+ int parsePartialKeyword(int position, int partialState) {

+ int keywordType = partialState & KWD_TYPE_MASK;

+ int count = partialState >> KWD_COUNT_SHIFT;

+ int keywordTypeIndex = keywordType >> KWD_TYPE_SHIFT;

+ String keyword = const ["null", "true", "false"][keywordTypeIndex];

+ assert(count < keyword.length);

+ do {

+ if (position == chunkEnd) {

+ this.partialState =

+ PARTIAL_KEYWORD | keywordType | (count << KWD_COUNT_SHIFT);

+ return chunkEnd;

+ }

+ int expectedChar = keyword.codeUnitAt(count);

+ if (getChar(position) != expectedChar) return fail(position);

+ position++;

+ count++;

+ } while (count < keyword.length);

+ if (keywordType == KWD_NULL) {

+ listener.handleNull();

+ } else {

+ listener.handleBool(keywordType == KWD_TRUE);

+ }

+ return position;

+ }

+ int parseHexDigit(int char) {

+ int digit = char ^ 0x30;

+ if (digit <= 9) return digit;

+ int letter = (char | 0x20) ^ 0x60;

+ // values 1 .. 6 are 'a' through 'f'

+ if (letter <= 6 && letter > 0) return letter + 9;

+ return -1;

+ }

/** Parses [source], or throws if it fails. */

- void parse() {

- final List<int> states = <int>[];

- int state = STATE_INITIAL;

- int position = 0;

- int length = source.length;

+ void parse(int position) {

+ int length = chunkEnd;

+ if (partialState != NO_PARTIAL) {

+ position = parsePartial(position);

+ if (position == length) return;

+ }

+ int state = this.state;

while (position < length) {

- int char = source.codeUnitAt(position);

+ int char = getChar(position);

switch (char) {

case SPACE:

case CARRIAGE_RETURN:

@@ -246,41 +698,41 @@ class _JsonParser {

position++;

break;

case QUOTE:

- if ((state & ALLOW_STRING_MASK) != 0) fail(position);

- position = parseString(position + 1);

+ if ((state & ALLOW_STRING_MASK) != 0) return fail(position);

state |= VALUE_READ_BITS;

+ position = parseString(position + 1);

break;

case LBRACKET:

- if ((state & ALLOW_VALUE_MASK) != 0) fail(position);

+ if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);

listener.beginArray();

- states.add(state);

+ saveState(state);

state = STATE_ARRAY_EMPTY;

position++;

break;

case LBRACE:

- if ((state & ALLOW_VALUE_MASK) != 0) fail(position);

+ if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);

listener.beginObject();

- states.add(state);

+ saveState(state);

state = STATE_OBJECT_EMPTY;

position++;

break;

case CHAR_n:

- if ((state & ALLOW_VALUE_MASK) != 0) fail(position);

- position = parseNull(position);

+ if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);

state |= VALUE_READ_BITS;

+ position = parseNull(position);

break;

case CHAR_f:

- if ((state & ALLOW_VALUE_MASK) != 0) fail(position);

- position = parseFalse(position);

+ if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);

state |= VALUE_READ_BITS;

+ position = parseFalse(position);

break;

case CHAR_t:

- if ((state & ALLOW_VALUE_MASK) != 0) fail(position);

- position = parseTrue(position);

+ if ((state & ALLOW_VALUE_MASK) != 0) return fail(position);

state |= VALUE_READ_BITS;

+ position = parseTrue(position);

break;

case COLON:

- if (state != STATE_OBJECT_KEY) fail(position);

+ if (state != STATE_OBJECT_KEY) return fail(position);

listener.propertyName();

state = STATE_OBJECT_COLON;

position++;

@@ -295,7 +747,7 @@ class _JsonParser {

state = STATE_ARRAY_COMMA;

position++;

} else {

- fail(position);

+ return fail(position);

}

break;

case RBRACKET:

@@ -305,9 +757,9 @@ class _JsonParser {

listener.arrayElement();

listener.endArray();

} else {

- fail(position);

+ return fail(position);

}

- state = states.removeLast() | VALUE_READ_BITS;

+ state = restoreState() | VALUE_READ_BITS;

position++;

break;

case RBRACE:

@@ -317,19 +769,19 @@ class _JsonParser {

listener.propertyValue();

listener.endObject();

} else {

- fail(position);

+ return fail(position);

}

- state = states.removeLast() | VALUE_READ_BITS;

+ state = restoreState() | VALUE_READ_BITS;

position++;

break;

default:

if ((state & ALLOW_VALUE_MASK) != 0) fail(position);

- position = parseNumber(char, position);

state |= VALUE_READ_BITS;

+ position = parseNumber(char, position);

break;

}

- if (state != STATE_END) fail(position);

+ this.state = state;

}

/**

@@ -338,12 +790,14 @@ class _JsonParser {

* [:source[position]:] must be "t".

int parseTrue(int position) {

- assert(source.codeUnitAt(position) == CHAR_t);

- if (source.length < position + 4) fail(position, "Unexpected identifier");

- if (source.codeUnitAt(position + 1) != CHAR_r ||

- source.codeUnitAt(position + 2) != CHAR_u ||

- source.codeUnitAt(position + 3) != CHAR_e) {

- fail(position);

+ assert(getChar(position) == CHAR_t);

+ if (chunkEnd < position + 4) {

+ return parseKeywordPrefix(position, "true", KWD_TRUE);

+ }

+ if (getChar(position + 1) != CHAR_r ||

+ getChar(position + 2) != CHAR_u ||

+ getChar(position + 3) != CHAR_e) {

+ return fail(position);

}

listener.handleBool(true);

return position + 4;

@@ -355,13 +809,15 @@ class _JsonParser {

* [:source[position]:] must be "f".

int parseFalse(int position) {

- assert(source.codeUnitAt(position) == CHAR_f);

- if (source.length < position + 5) fail(position, "Unexpected identifier");

- if (source.codeUnitAt(position + 1) != CHAR_a ||

- source.codeUnitAt(position + 2) != CHAR_l ||

- source.codeUnitAt(position + 3) != CHAR_s ||

- source.codeUnitAt(position + 4) != CHAR_e) {

- fail(position);

+ assert(getChar(position) == CHAR_f);

+ if (chunkEnd < position + 5) {

+ return parseKeywordPrefix(position, "false", KWD_FALSE);

+ }

+ if (getChar(position + 1) != CHAR_a ||

+ getChar(position + 2) != CHAR_l ||

+ getChar(position + 3) != CHAR_s ||

+ getChar(position + 4) != CHAR_e) {

+ return fail(position);

}

listener.handleBool(false);

return position + 5;

@@ -373,17 +829,33 @@ class _JsonParser {

* [:source[position]:] must be "n".

int parseNull(int position) {

- assert(source.codeUnitAt(position) == CHAR_n);

- if (source.length < position + 4) fail(position, "Unexpected identifier");

- if (source.codeUnitAt(position + 1) != CHAR_u ||

- source.codeUnitAt(position + 2) != CHAR_l ||

- source.codeUnitAt(position + 3) != CHAR_l) {

- fail(position);

+ assert(getChar(position) == CHAR_n);

+ if (chunkEnd < position + 4) {

+ return parseKeywordPrefix(position, "null", KWD_NULL);

+ }

+ if (getChar(position + 1) != CHAR_u ||

+ getChar(position + 2) != CHAR_l ||

+ getChar(position + 3) != CHAR_l) {

+ return fail(position);

}

listener.handleNull();

return position + 4;

}

+ int parseKeywordPrefix(int position, String chars, int type) {

+ assert(getChar(position) == chars.codeUnitAt(0));

+ int length = chunkEnd;

+ int start = position;

+ int count = 1;

+ while (++position < length) {

+ int char = getChar(position);

+ if (char != chars.codeUnitAt(count)) return fail(start);

+ count++;

+ }

+ this.partialState = PARTIAL_KEYWORD | type | (count << KWD_COUNT_SHIFT);

+ return length;

+ }

/**

* Parses a string value.

@@ -394,92 +866,172 @@ class _JsonParser {

// Format: '"'([^\x00-\x1f\\\"]|'\\'[bfnrt/\\"])*'"'

// Initial position is right after first '"'.

int start = position;

- while (position < source.length) {

- int char = source.codeUnitAt(position++);

+ int end = chunkEnd;

+ while (position < end) {

+ int char = getChar(position++);

// BACKSLASH is larger than QUOTE and SPACE.

if (char > BACKSLASH) {

continue;

}

if (char == BACKSLASH) {

- return parseStringWithEscapes(start, position - 1);

+ beginString();

+ addSliceToString(start, position - 1);

+ return parseStringToBuffer(position - 1);

}

if (char == QUOTE) {

- listener.handleString(source.substring(start, position - 1));

+ listener.handleString(getString(start, position - 1));

return position;

}

if (char < SPACE) {

fail(position - 1, "Control character in string");

}

- fail(start - 1, "Unterminated string");

+ beginString();

+ addSliceToString(start, end);

+ return chunkString(STR_PLAIN);

}

- int parseStringWithEscapes(start, position) {

- // Backslash escape detected. Collect character codes for rest of string.

- int firstEscape = position;

- List<int> chars = <int>[];

- for (int i = start; i < firstEscape; i++) {

- chars.add(source.codeUnitAt(i));

- }

- position++;

+ int chunkString(int stringState) {

+ partialState = PARTIAL_STRING | stringState;

+ return chunkEnd;

+ }

+ int chunkStringEscapeU(int count, int value) {

+ partialState = PARTIAL_STRING | STR_U |

+ (count << STR_U_COUNT_SHIFT) |

+ (value << STR_U_VALUE_SHIFT);

+ return chunkEnd;

+ }

+ int parseStringToBuffer(position) {

+ int end = chunkEnd;

+ int start = position;

while (true) {

- if (position == source.length) {

- fail(start - 1, "Unterminated string");

+ if (position == end) {

+ if (position > start) {

+ addSliceToString(start, position);

+ }

+ return chunkString(STR_PLAIN);

}

- int char = source.codeUnitAt(position);

- switch (char) {

- case CHAR_b: char = BACKSPACE; break;

- case CHAR_f: char = FORM_FEED; break;

- case CHAR_n: char = NEWLINE; break;

- case CHAR_r: char = CARRIAGE_RETURN; break;

- case CHAR_t: char = TAB; break;

- case SLASH:

- case BACKSLASH:

- case QUOTE:

- break;

- case CHAR_u:

- int hexStart = position - 1;

- int value = 0;

- for (int i = 0; i < 4; i++) {

- position++;

- if (position == source.length) {

- fail(start - 1, "Unterminated string");

- }

- char = source.codeUnitAt(position);

- char -= 0x30;

- if (char < 0) fail(hexStart, "Invalid unicode escape");

- if (char < 10) {

- value = value * 16 + char;

- } else {

- char = (char | 0x20) - 0x31;

- if (char < 0 || char > 5) {

- fail(hexStart, "Invalid unicode escape");

- }

- value = value * 16 + char + 10;

- }

- char = value;

- break;

- default:

- if (char < SPACE) fail(position, "Control character in string");

- fail(position, "Unrecognized string escape");

+ int char = getChar(position++);

+ if (char > BACKSLASH) continue;

+ if (char < SPACE) {

+ fail(position - 1); // Control character in string.

+ return;

}

- do {

- chars.add(char);

- position++;

- if (position == source.length) fail(start - 1, "Unterminated string");

- char = source.codeUnitAt(position);

- if (char == QUOTE) {

- String result = new String.fromCharCodes(chars);

- listener.handleString(result);

- return position + 1;

+ if (char == QUOTE) {

+ int quotePosition = position - 1;

+ if (quotePosition > start) {

+ addSliceToString(start, quotePosition);

}

- if (char < SPACE) {

- fail(position, "Control character in string");

+ listener.handleString(endString());

+ return position;

+ }

+ if (char != BACKSLASH) {

+ continue;

+ }

+ // Handle escape.

+ if (position - 1 > start) {

+ addSliceToString(start, position - 1);

+ }

+ if (position == end) return chunkString(STR_ESCAPE);

+ position = parseStringEscape(position);

+ if (position == end) return position;

+ start = position;

+ }

+ return -1; // UNREACHABLE.

+ }

+ int parseStringEscape(int position) {

Søren Gjesse 2014/10/24 11:12:24 Add a comment that position is just after the back

Lasse Reichstein Nielsen 2014/10/27 12:42:32 Done.

+ int char = getChar(position++);

+ int length = chunkEnd;

+ switch (char) {

+ case CHAR_b: char = BACKSPACE; break;

+ case CHAR_f: char = FORM_FEED; break;

+ case CHAR_n: char = NEWLINE; break;

+ case CHAR_r: char = CARRIAGE_RETURN; break;

+ case CHAR_t: char = TAB; break;

+ case SLASH:

+ case BACKSLASH:

+ case QUOTE:

+ break;

+ case CHAR_u:

+ int hexStart = position - 1;

+ int value = 0;

+ for (int i = 0; i < 4; i++) {

+ if (position == length) return chunkStringEscapeU(i, value);

+ char = getChar(position++);

+ int digit = char ^ 0x30;

+ value *= 16;

+ if (digit <= 9) {

+ value += digit;

+ } else {

+ digit = (char | 0x20) - CHAR_a;

+ if (digit < 0 || digit > 5) {

+ return fail(hexStart, "Invalid unicode escape");

+ }

+ value += digit + 10;

+ }

}

- } while (char != BACKSLASH);

- position++;

+ char = value;

+ break;

+ default:

+ if (char < SPACE) return fail(position, "Control character in string");

+ return fail(position, "Unrecognized string escape");

+ }

+ addCharToString(char);

+ if (position == length) return chunkString(STR_PLAIN);

+ return position;

+ }

+ /// Sets up a partial numeral state.

+ /// Returns chunkEnd to allow easy one-line bailout tests.

+ int beginChunkNumber(int state, int start) {

+ int end = chunkEnd;

+ int length = end - start;

+ var buffer = new _NumberBuffer(length);

+ copyCharsToList(start, end, buffer.list, 0);

+ buffer.length = length;

+ this.buffer = buffer;

+ this.partialState = PARTIAL_NUMERAL | state;

+ return end;

+ }

+ void addNumberChunk(_NumberBuffer buffer, int start, int end, int overhead) {

+ int length = end - start;

+ int count = buffer.length;

+ int newCount = count + length;

+ int newCapacity = newCount + overhead;

+ buffer.ensureCapacity(newCapacity);

+ copyCharsToList(start, end, buffer.list, count);

+ buffer.length = newCount;

+ }

+ // Continues an already chunked number accross an entire chunk.

+ int continueChunkNumber(int state, int start, _NumberBuffer buffer) {

+ int end = chunkEnd;

+ addNumberChunk(buffer, start, end, _NumberBuffer.kDefaultOverhead);

+ this.buffer = buffer;

+ this.partialState = PARTIAL_NUMERAL | state;

+ return end;

+ }

+ int finishChunkNumber(int state, int start, int end, _NumberBuffer buffer) {

+ if (state == NUM_ZERO) {

+ listener.handleNumber(0);

+ return;

}

+ if (end > start) {

+ addNumberChunk(buffer, start, end, 0);

+ }

+ if (state == NUM_DIGIT) {

+ listener.handleNumber(buffer.parseInt());

+ } else if (state == NUM_DOT_DIGIT || state == NUM_E_DIGIT) {

+ listener.handleNumber(buffer.parseDouble());

+ } else {

+ fail(chunkEnd, "Unterminated number literal");

+ }

+ return end;

}

int parseNumber(int char, int position) {

@@ -487,89 +1039,544 @@ class _JsonParser {

// Format:

// '-'?('0'|[1-9][0-9]*)('.'[0-9]+)?([eE][+-]?[0-9]+)?

int start = position;

- int length = source.length;

- int intValue = 0; // Collect int value while parsing.

- int intSign = 1;

+ int length = chunkEnd;

+ int intValue = 0; // Collect int value while parsing.

+ double doubleValue = 0.0; // Collect double value while parsing.

+ int sign = 1;

bool isDouble = false;

// Break this block when the end of the number literal is reached.

// At that time, position points to the next character, and isDouble

// is set if the literal contains a decimal point or an exponential.

parsing: {

if (char == MINUS) {

- intSign = -1;

+ sign = -1;

position++;

- if (position == length) fail(position, "Missing expected digit");

- char = source.codeUnitAt(position);

+ if (position == length) return beginChunkNumber(NUM_SIGN, start);

+ char = getChar(position);

}

- if (char < CHAR_0 || char > CHAR_9) {

- if (intSign < 0) {

+ int digit = char ^ CHAR_0;

+ if (digit > 9) {

+ if (sign < 0) {

fail(position, "Missing expected digit");

} else {

// If it doesn't even start out as a numeral.

fail(position, "Unexpected character");

}

- if (char == CHAR_0) {

+ if (digit == 0) {

position++;

- if (position == length) break parsing;

- char = source.codeUnitAt(position);

- if (CHAR_0 <= char && char <= CHAR_9) {

- fail(position);

- }

+ if (position == length) return beginChunkNumber(NUM_ZERO, start);

+ char = getChar(position);

+ digit = char ^ CHAR_0;

+ // If starting with zero, next character must not be digit.

+ if (digit <= 9) fail(position);

} else {

do {

- intValue = intValue * 10 + (char - CHAR_0);

+ intValue = 10 * intValue + digit;

position++;

- if (position == length) break parsing;

- char = source.codeUnitAt(position);

- } while (CHAR_0 <= char && char <= CHAR_9);

+ if (position == length) return beginChunkNumber(NUM_DIGIT, start);

+ char = getChar(position);

+ digit = char ^ CHAR_0;

+ } while (digit <= 9);

}

if (char == DECIMALPOINT) {

isDouble = true;

+ doubleValue = intValue.toDouble();

+ intValue = 0;

position++;

- if (position == length) fail(position, "Missing expected digit");

- char = source.codeUnitAt(position);

- if (char < CHAR_0 || char > CHAR_9) fail(position);

+ if (position == length) return beginChunkNumber(NUM_DOT, start);

+ char = getChar(position);

+ digit = char ^ CHAR_0;

+ if (digit > 9) fail(position);

do {

+ doubleValue = 10.0 * doubleValue + digit;

+ intValue -= 1;

floitsch 2014/10/20 08:52:44 Don't reuse "intValue". Afaics this has nothing to

Lasse Reichstein Nielsen 2014/10/27 12:42:32 It is collecting *an* integer value. I'll just do

position++;

- if (position == length) break parsing;

- char = source.codeUnitAt(position);

- } while (CHAR_0 <= char && char <= CHAR_9);

+ if (position == length) return beginChunkNumber(NUM_DOT_DIGIT, start);

+ char = getChar(position);

+ digit = char ^ CHAR_0;

+ } while (digit <= 9);

}

- if (char == CHAR_e || char == CHAR_E) {

- isDouble = true;

+ if ((char | 0x20) == CHAR_e) {

+ if (!isDouble) {

+ doubleValue = intValue.toDouble();

+ intValue = 0;

floitsch 2014/10/20 08:52:44 ditto.

Lasse Reichstein Nielsen 2014/10/27 12:42:32 Same.

+ isDouble = true;

+ }

position++;

- if (position == length) fail(position, "Missing expected digit");

- char = source.codeUnitAt(position);

+ if (position == length) return beginChunkNumber(NUM_E, start);

+ char = getChar(position);

+ int expSign = 1;

+ int exponent = 0;

if (char == PLUS || char == MINUS) {

+ expSign = 0x2C - char; // -1 for MINUS, +1 for PLUS

position++;

- if (position == length) fail(position, "Missing expected digit");

- char = source.codeUnitAt(position);

+ if (position == length) return beginChunkNumber(NUM_E_SIGN, start);

+ char = getChar(position);

}

- if (char < CHAR_0 || char > CHAR_9) {

+ digit = char ^ CHAR_0;

+ if (digit > 9) {

fail(position, "Missing expected digit");

}

do {

+ exponent = 10 * exponent + digit;

position++;

- if (position == length) break parsing;

- char = source.codeUnitAt(position);

- } while (CHAR_0 <= char && char <= CHAR_9);

+ if (position == length) return beginChunkNumber(NUM_E_DIGIT, start);

+ char = getChar(position);

+ digit = char ^ CHAR_0;

+ } while (digit <= 9);

+ intValue += expSign * exponent;

}

if (!isDouble) {

- listener.handleNumber(intSign * intValue);

+ listener.handleNumber(sign * intValue);

return position;

}

- // This correctly creates -0.0 for doubles.

- listener.handleNumber(_parseDouble(source, start, position));

+ const double maxExactDouble = 9007199254740992.0;

floitsch 2014/10/20 08:52:44 comment.

Lasse Reichstein Nielsen 2014/10/27 12:42:32 Done.

+ if (doubleValue < maxExactDouble) {

+ int exponent = intValue;

+ double signedMantissa = doubleValue * sign;

+ if (exponent >= -22) {

+ if (exponent < 0) {

+ listener.handleNumber(signedMantissa / POWERS_OF_TEN[-exponent]);

+ return position;

+ }

+ if (exponent == 0) {

+ listener.handleNumber(signedMantissa);

+ return position;

+ }

+ if (exponent <= 22) {

+ listener.handleNumber(signedMantissa * POWERS_OF_TEN[exponent]);

+ return position;

+ }

+ listener.handleNumber(parseDouble(start, position));

floitsch 2014/10/20 08:52:44 comment.

Lasse Reichstein Nielsen 2014/10/27 12:42:33 Done.

+ return position;

+ }

+ int fail(int position, [String message]) {

+ if (message == null) {

+ message = "Unexpected character";

+ if (position == chunkEnd) message = "Unexpected end of input";

+ }

+ throw new FormatException(message, chunk, position);

+ }

+/**

+ * Chunked JSON parser that parses [String] chunks.

+ */

+class _JsonStringParser extends _ChunkedJsonParser {

+ String chunk;

+ int chunkEnd;

+ _JsonStringParser(_JsonListener listener) : super(listener);

+ int getChar(int position) => chunk.codeUnitAt(position);

+ String getString(int start, int end) {

+ return chunk.substring(start, end);

+ }

+ void beginString() {

+ this.buffer = new StringBuffer();

+ }

+ void addSliceToString(int start, int end) {

+ StringBuffer buffer = this.buffer;

+ buffer.write(chunk.substring(start, end));

+ }

+ void addCharToString(int charCode) {

+ StringBuffer buffer = this.buffer;

+ buffer.writeCharCode(charCode);

+ }

+ String endString() {

+ StringBuffer buffer = this.buffer;

+ this.buffer = null;

+ return buffer.toString();

+ }

+ void copyCharsToList(int start, int end, List target, int offset) {

+ int length = end - start;

+ for (int i = 0; i < length; i++) {

+ target[offset + i] = chunk.codeUnitAt(start + i);

+ }

+ double parseDouble(int start, int end) {

+ return _parseDouble(chunk, start, end);

+ }

+patch class JsonDecoder {

+ /* patch */ StringConversionSink startChunkedConversion(Sink<Object> sink) {

+ return new _JsonStringDecoderSink(this._reviver, sink);

+ }

+/**

+ * Implements the chunked conversion from a JSON string to its corresponding

+ * object.

+ *

+ * The sink only creates one object, but its input can be chunked.

+ */

+class _JsonStringDecoderSink extends StringConversionSinkBase {

+ _ChunkedJsonParser _parser;

+ Function _reviver;

+ final Sink<Object> _sink;

+ _JsonStringDecoderSink(reviver, this._sink)

+ : _reviver = reviver, _parser = _createParser(reviver);

+ static _ChunkedJsonParser _createParser(reviver) {

+ _BuildJsonListener listener;

+ if (reviver == null) {

+ listener = new _BuildJsonListener();

+ } else {

+ listener = new _ReviverJsonListener(reviver);

+ }

+ return new _JsonStringParser(listener);

+ }

+ void addSlice(String chunk, int start, int end, bool isLast) {

+ _parser.chunk = chunk;

+ _parser.chunkEnd = end;

+ _parser.parse(start);

+ if (isLast) _parser.close();

+ }

+ void add(String chunk) {

+ addSlice(chunk, 0, chunk.length, false);

+ }

+ void close() {

+ _parser.close();

+ var decoded = _parser.result;

+ _sink.add(decoded);

+ _sink.close();

+ }

+ Utf8ConversionSink asUtf8Sink(bool allowMalformed) {

+ _parser = null;

+ return new _JsonUtf8DecoderSink(_reviver, _sink, allowMalformed);

+ }

+class _Utf8StringBuffer {

+ static const int INITIAL_CAPACITY = 32;

+ // Partial state encoding.

+ static const int MASK_TWO_BIT = 0x03;

+ static const int MASK_SIZE = MASK_TWO_BIT;

+ static const int SHIFT_MISSING = 2;

+ static const int SHIFT_VALUE = 4;

+ static const int NO_PARTIAL = 0;

+ // UTF-8 encoding and limits.

+ static const int MAX_ASCII = 127;

+ static const int MAX_TWO_BYTE = 0x7ff;

+ static const int MAX_THREE_BYTE = 0xffff;

+ static const int MAX_UNICODE = 0X10ffff;

+ static const int MASK_TWO_BYTE = 0x1f;

+ static const int MASK_THREE_BYTE = 0x0f;

+ static const int MASK_FOUR_BYTE = 0x07;

+ static const int MASK_CONTINUE_TAG = 0xC0;

+ static const int MASK_CONTINUE_VALUE = 0x3f;

+ static const int CONTINUE_TAG = 0x80;

+ // UTF-16 surrogate encoding.

+ static const int LEAD_SURROGATE = 0xD800;

+ static const int TAIL_SURROGATE = 0xDC00;

+ static const int SHIFT_HIGH_SURROGATE = 10;

+ static const int MASK_LOW_SURROGATE = 0x3ff;

Søren Gjesse 2014/10/24 11:12:24 Comment that buffer starts as Uint8, but might cha

Lasse Reichstein Nielsen 2014/10/27 12:42:32 Comment added. We convert all non-BMP characters t

+ List<int> buffer = new Uint8List(INITIAL_CAPACITY);

+ int length = 0;

+ int partialState = NO_PARTIAL;

+ bool isLatin1 = true;

+ // If allowing malformed, invalid UTF-8 sequences are converted to

+ // U+FFFD.

+ bool allowMalformed;

+ _Utf8StringBuffer(this.allowMalformed);

+ /**

+ * Parse the continuation of a multi-byte UTF-8 sequence.

+ *

+ * Parse [utf8] from [position] to [end]. If the sequence extends beyond

+ * `end`, store the partial state in [partialState], and continue from there

+ * on the next added slice.

+ *

+ * The [size] is the number of expected continuation bytes total,

+ * and [missing] is the number of remaining continuation bytes.

+ * The [size] is used to detect overlong encodings.

+ * The [value] is the value collected so far.

+ *

+ * When called after seeing the first multi-byte marker, the [size] and

+ * [missing] values are always the same, but they may differ if continuing

+ * after a partial sequence.

+ */

+ int addContinuation(List<int> utf8, int position, int end,

+ int size, int missing, int value) {

+ int codeEnd = position + missing;

+ do {

+ if (position == end) {

+ missing = codeEnd - position;

+ partialState =

+ size | (missing << SHIFT_MISSING) | (value << SHIFT_VALUE);

+ return end;

+ }

+ int char = utf8[position];

+ if ((char & MASK_CONTINUE_TAG) != CONTINUE_TAG) {

+ if (allowMalformed) {

+ addCharCode(0xFFFD);

+ return position;

+ }

+ throw new FormatException("Expected UTF-8 continuation byte, "

+ "found $char", utf8, position);

+ }

+ value = 64 * value + (char & MASK_CONTINUE_VALUE);

+ position++;

+ } while (position < codeEnd);

+ if (value <= const [0, MAX_ASCII, MAX_TWO_BYTE, MAX_THREE_BYTE][size]) {

+ // Over-long encoding.

+ if (allowMalformed) {

+ value = 0xFFFD;

+ } else {

+ throw new FormatException(

+ "Invalid encoding: U+${value.toRadixString(16).padLeft(4, '0')}"

+ " encoded in ${size + 1} bytes.", utf8, position - 1);

+ }

+ addCharCode(value);

return position;

}

- static double _parseDouble(String source, int start, int end)

- native "Double_parse";

+ void addCharCode(int char) {

+ assert(char >= 0);

+ assert(char <= MAX_UNICODE);

+ if (partialState != NO_PARTIAL) {

+ if (allowMalformed) {

+ partialState = NO_PARTIAL;

+ addCharCode(0xFFFD);

+ } else {

+ throw new FormatException("Incomplete UTF-8 sequence", utf8);

+ }

+ if (isLatin1 && char > 0xff) {

+ _to16Bit(); // Also grows a little if close to full.

+ }

+ int length = this.length;

+ if (char <= MAX_THREE_BYTE) {

+ if (length == buffer.length) _grow();

+ buffer[length] = char;

+ this.length = length + 1;

+ return;

+ }

+ if (length + 2 > buffer.length) _grow();

+ int bits = char - 0x10000;

+ buffer[length] = LEAD_SURROGATE | (bits >> SHIFT_HIGH_SURROGATE);

+ buffer[length + 1] = TAIL_SURROGATE | (bits & MASK_LOW_SURROGATE);

+ this.length = length + 2;

+ }

+ void _to16Bit() {

+ assert(isLatin1);

+ int newCapacity = buffer.length;

+ if (newCapacity - length < INITIAL_CAPACITY) {

+ newCapacity = length + INITIAL_CAPACITY;

+ }

+ Uint16List newBuffer = new Uint16List(newCapacity);

+ newBuffer.setRange(0, length, buffer, 0);

+ buffer = newBuffer;

+ isLatin1 = false;

+ }

+ void _grow() {

+ int newCapacity = buffer.length * 2;

+ List newBuffer;

+ if (isLatin1) {

+ newBuffer = new Uint8List(newCapacity);

+ } else {

+ newBuffer = new Uint16List(newCapacity);

+ }

+ newBuffer.setRange(0, length, buffer);

+ buffer = newBuffer;

+ }

+ void addSlice(List<int> utf8, int position, int end) {

+ assert(position < end);

+ if (partialState > 0) {

+ int continueByteCount = (partialState & MASK_TWO_BIT);

+ int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT;

+ int value = partialState >> SHIFT_VALUE;

+ partialState = NO_PARTIAL;

+ position = addContinuation(utf8, position, end,

+ continueByteCount, missing, value);

+ if (position == end) return;

+ }

+ int index = length;

+ int capacity = buffer.length;

+ while (position < end) {

+ int char = utf8[position];

+ if (char <= MAX_ASCII) {

+ if (index == capacity) _grow();

+ buffer[index++] = char;

+ position++;

+ continue;

+ }

+ length = index;

+ if ((char & MASK_CONTINUE_TAG) == CONTINUE_TAG) {

+ if (allowMalformed) {

+ addCharCode(0xFFFD);

+ position++;

+ } else {

+ throw new FormatException("Unexepected UTF-8 continuation byte",

+ utf8, position);

+ }

+ } else if (char < 0xE0) { // C0-DF

+ // Two-byte.

+ position = addContinuation(utf8, position + 1, end, 1, 1,

+ char & MASK_TWO_BYTE);

+ } else if (char < 0xF0) { // E0-EF

+ // Three-byte.

+ position = addContinuation(utf8, position + 1, end, 2, 2,

+ char & MASK_THREE_BYTE);

+ } else if (char < 0xF8) { // F0-F7

+ // Four-byte.

+ position = addContinuation(utf8, position + 1, end, 3, 3,

+ char & MASK_FOUR_BYTE);

+ } else {

+ if (allowMalformed) {

+ addCharCode(0xFFFD);

+ position++;

+ } else {

+ throw new FormatException("Invalid UTF-8 byte: $char",

+ utf8, position);

+ }

+ index = length;

+ }

+ length = index;

+ }

+ String toString() {

+ if (partialState != NO_PARTIAL) {

+ if (allowMalformed) {

+ partialState = NO_PARTIAL;

+ addCharCode(0xFFFD);

+ } else {

+ int continueByteCount = (partialState & MASK_TWO_BIT);

+ int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT;

+ int value = partialState >> SHIFT_VALUE;

+ int seenByteCount = continueByteCount - missing + 1;

+ List source = new Uint8List(seenByteCount);

+ while (seenByteCount > 1) {

+ seenByteCount--;

+ source[seenByteCount] = CONTINUE_TAG | (value & MASK_CONTINUE_VALUE);

+ value >>= 6;

+ }

+ source[0] = value | (0x3c0 >> (continueByteCount - 1));

+ throw new FormatException("Incomplete UTF-8 sequence",

+ source, source.length);

+ }

+ return new String.fromCharCodes(buffer, 0, length);

+ }

+/**

+ * Chunked JSON parser that parses UTF-8 chunks.

+ */

+class _JsonUtf8Parser extends _ChunkedJsonParser {

+ final bool allowMalformed;

+ List<int> chunk;

+ int chunkEnd;

+ _JsonUtf8Parser(_JsonListener listener, this.allowMalformed)

+ : super(listener);

+ int getChar(int position) => chunk[position];

+ String getString(int start, int end) {

+ beginString();

+ addSliceToString(start, end);

+ String result = endString();

+ return result;

+ }

+ void beginString() {

+ this.buffer = new _Utf8StringBuffer(allowMalformed);

+ }

+ void addSliceToString(int start, int end) {

+ _Utf8StringBuffer buffer = this.buffer;

+ buffer.addSlice(chunk, start, end);

+ }

+ void addCharToString(int charCode) {

+ _Utf8StringBuffer buffer = this.buffer;

+ buffer.addCharCode(charCode);

+ }

+ String endString() {

+ _Utf8StringBuffer buffer = this.buffer;

+ this.buffer = null;

+ return buffer.toString();

+ }

+ void copyCharsToList(int start, int end, List target, int offset) {

+ int length = end - start;

+ target.setRange(offset, offset + length, chunk, start);

+ }

+ double parseDouble(int start, int end) {

+ String string = getString(start, end);

+ reutrn _parseDouble(string, 0, string.length);

+ }

+double _parseDouble(String source, int start, int end)

+ native "Double_parse";

+/**

+ * Implements the chunked conversion from a UTF-8 encoding of JSON

+ * to its corresponding object.

+ */

+class _JsonUtf8DecoderSink extends ByteConversionSinkBase {

+ _ChunkedUtf8Parser _parser;

+ final Sink<Object> _sink;

+ _JsonUtf8DecoderSink(reviver, this._sink, bool allowMalformed)

+ : _parser = _createParser(reviver, allowMalformed);

+ static _ChunkedJsonParser _createParser(reviver, bool allowMalformed) {

+ _BuildJsonListener listener;

+ if (reviver == null) {

+ listener = new _BuildJsonListener();

+ } else {

+ listener = new _ReviverJsonListener(reviver);

+ }

+ return new _JsonUtf8Parser(listener, allowMalformed);

+ }

+ void addSlice(List<int> chunk, int start, int end, bool isLast) {

+ _parser.chunk = chunk;

+ _parser.chunkEnd = end;

+ _parser.parse(start);

+ if (isLast) _parser.close();

+ }

+ void add(List<int> chunk) {

+ addSlice(chunk, 0, chunk.length, false);

+ }

- void fail(int position, [String message]) {

- if (message == null) message = "Unexpected character";

- throw new FormatException(message, source, position);

+ void close() {

+ _parser.close();

+ var decoded = _parser.result;

+ _sink.add(decoded);

+ _sink.close();

}

« no previous file with comments | « no previous file | runtime/lib/double_patch.dart » ('j') | tests/lib/convert/json_chunk_test.dart » ('J')