Chromium Code Reviews| Index: runtime/lib/convert_patch.dart |
| diff --git a/runtime/lib/convert_patch.dart b/runtime/lib/convert_patch.dart |
| index 2a4ab1bc8af658b484fc94e8726a2c2afc8f8627..e00db53811bc1d667bbe37b1b710506c086b3c77 100644 |
| --- a/runtime/lib/convert_patch.dart |
| +++ b/runtime/lib/convert_patch.dart |
| @@ -1,7 +1,9 @@ |
| -// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
| +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| +import "dart:_internal" show POWERS_OF_TEN; |
| + |
| // JSON conversion. |
| patch _parseJson(String json, reviver(var key, var value)) { |
| @@ -11,7 +13,11 @@ patch _parseJson(String json, reviver(var key, var value)) { |
| } else { |
| listener = new _ReviverJsonListener(reviver); |
| } |
| - new _JsonParser(json, listener).parse(); |
| + var parser = new _JsonStringParser(listener); |
| + parser.chunk = json; |
| + parser.chunkEnd = json.length; |
| + parser.parse(0); |
| + parser.close(); |
| return listener.result; |
| } |
| @@ -19,6 +25,9 @@ patch _parseJson(String json, reviver(var key, var value)) { |
| // Simple API for JSON parsing. |
| +/** |
| + * Listener for parsing events from [_ChunkedJsonParser]. |
| + */ |
| abstract class _JsonListener { |
| void handleString(String value) {} |
| void handleNumber(num value) {} |
| @@ -34,7 +43,7 @@ abstract class _JsonListener { |
| } |
| /** |
| - * A [JsonListener] that builds data objects from the parser events. |
| + * A [_JsonListener] that builds data objects from the parser events. |
| * |
| * This is a simple stack-based object builder. It keeps the most recently |
| * seen value in a variable, and uses it depending on the following event. |
| @@ -135,7 +144,74 @@ class _ReviverJsonListener extends _BuildJsonListener { |
| } |
| } |
| -class _JsonParser { |
| +/** |
| + * Buffer holding parts of a numeral. |
| + * |
| + * The buffer contains the characters of a JSON number. |
| + * These are all ASCII, so an [Uint8List] is used as backing store. |
| + * |
| + * This buffer is used when a JSON number is split between separate chunks. |
| + * |
| + */ |
| +class _NumberBuffer { |
| + static const int kMinCapacity = 16; |
|
floitsch
2014/10/20 08:52:44
constants in Dart don't start with "k".
maybe they
Lasse Reichstein Nielsen
2014/10/27 12:42:32
I know. The style guide changed so they are no lon
|
| + static const int kDefaultOverhead = 5; |
| + Uint8List list; |
| + int length = 0; |
| + _NumberBuffer(int initialCapacity) |
| + : list = new Uint8List(_initialCapacity(initialCapacity)); |
| + |
| + int get capacity => list.length; |
| + |
| + // Pick an initial capacity greater than the first part's size. |
| + // The typical use case has two parts, this is the attempt at |
| + // guessing the size of the second part without overdoing it. |
| + // The default estimate of the second part is [kDefaultOverhead], |
| + // then round to multiplum of four, and return the result, |
| + // or [kMinCapacity] if that is greater. |
| + static int _initialCapacity(int minCapacity) { |
| + minCapacity += kDefaultOverhead; |
| + if (minCapacity < kMinCapacity) return kMinCapacity; |
| + minCapacity = (minCapacity + 3) & ~3; // Round to multile of four. |
|
floitsch
2014/10/20 08:52:44
multiple
Lasse Reichstein Nielsen
2014/10/27 12:42:32
Done.
|
| + return minCapacity; |
| + } |
| + |
| + // Grows to the exact size asked for. |
| + void ensureCapacity(int newCapcity) { |
|
floitsch
2014/10/20 08:52:44
newCapacity
Lasse Reichstein Nielsen
2014/10/27 12:42:33
Done.
|
| + Uint8List list = this.list; |
| + if (newCapcity <= list.length) return; |
| + Uint8List newList = new Uint8List(newCapcity); |
| + newList.setRange(0, list.length, list, 0); |
| + this.list = newList; |
| + } |
| + |
| + String toString() => "NumberBuffer"; |
|
floitsch
2014/10/20 08:52:44
maybe add the contents?
=> "NumberBuffer(${getStr
Lasse Reichstein Nielsen
2014/10/27 12:42:32
I think I had that for debugging, but I'll just re
|
| + |
| + String getString() { |
| + var list = this.list; |
| + if (length < list.length) { |
| + list = new Uint8List.view(list.buffer, 0, length); |
| + } |
| + String result = new String.fromCharCodes(list); |
| + return result; |
| + } |
| + |
| + // TODO(lrn): See if parsing of numbers can be abstracted to something |
| + // not only working on strings, but also on char-code lists, without lossing |
| + // performance. |
| + int parseInt() => int.parse(getString()); |
| + double parseDouble() => double.parse(getString()); |
| +} |
| + |
| +/** |
| + * Chunked JSON parser. |
| + * |
| + * Receives inputs in chunks, gives access to individual parts of the input, |
| + * and stores input state between chunks. |
| + * |
| + * Implementations include [String] and UTF-8 parsers. |
| + */ |
| +abstract class _ChunkedJsonParser { |
| // A simple non-recursive state-based parser for JSON. |
| // |
| // Literal values accepted in states ARRAY_EMPTY, ARRAY_COMMA, OBJECT_COLON |
| @@ -172,11 +248,11 @@ class _JsonParser { |
| static const int NO_VALUES = 12; |
| // Objects and arrays are "empty" until their first property/element. |
| + // At this position, they may either have an entry or a close-bracket. |
| static const int EMPTY = 0; |
| static const int NON_EMPTY = 16; |
| static const int EMPTY_MASK = 16; // Empty if zero. |
| - |
| static const int VALUE_READ_BITS = NO_VALUES | NON_EMPTY; |
| // Actual states. |
| @@ -226,18 +302,394 @@ class _JsonParser { |
| static const int LBRACE = 0x7b; |
| static const int RBRACE = 0x7d; |
| - final String source; |
| + // State of partial value at chunk split. |
| + static const int NO_PARTIAL = 0; |
| + static const int PARTIAL_STRING = 1; |
| + static const int PARTIAL_NUMERAL = 2; |
| + static const int PARTIAL_KEYWORD = 3; |
| + static const int MASK_PARTIAL = 3; |
| + |
| + // Partial states for numerals. Values can be |'ed with PARTIAL_NUMERAL. |
| + static const int NUM_SIGN = 0; // After initial '-'. |
| + static const int NUM_ZERO = 4; // After '0' as first digit. |
| + static const int NUM_DIGIT = 8; // After digit, no '.' or 'e' seen. |
| + static const int NUM_DOT = 12; // After '.'. |
| + static const int NUM_DOT_DIGIT = 16; // After a decimal digit (after '.'). |
| + static const int NUM_E = 20; // After 'e' or 'E'. |
| + static const int NUM_E_SIGN = 24; // After '-' or '+' after 'e' or 'E'. |
| + static const int NUM_E_DIGIT = 28; // After exponent digit. |
| + static const int NUM_SUCCESS = 32; // Never stored as partial state. |
| + |
| + // Partial states for strings. |
| + static const int STR_PLAIN = 0; // Inside string, but not escape. |
| + static const int STR_ESCAPE = 4; // After '\'. |
| + static const int STR_U = 16; // After '\u' and 0-3 hex digits. |
| + static const int STR_U_COUNT_SHIFT = 2; // Hex digit count in bits 2-3. |
| + static const int STR_U_VALUE_SHIFT = 5; // Hex digit value in bits 5+. |
| + |
| + // Partial states for keywords. |
| + static const int KWD_TYPE_MASK = 12; |
| + static const int KWD_TYPE_SHIFT = 2; |
| + static const int KWD_NULL = 0; // Prefix of "null" seen. |
| + static const int KWD_TRUE = 4; // Prefix of "true" seen. |
| + static const int KWD_FALSE = 8; // Prefix of "false" seen. |
| + static const int KWD_COUNT_SHIFT = 4; // Prefix length in bits 4+. |
| + |
| + // Mask used to mask off two lower bits. |
| + static const int TWO_BIT_MASK = 3; |
| + |
| final _JsonListener listener; |
| - _JsonParser(this.source, this.listener); |
| + |
| + // The current parsing state. |
| + int state = STATE_INITIAL; |
| + List<int> states = <int>[]; |
| + |
| + /** |
| + * Stores tokenizer state between chunks. |
| + * |
| + * This state is stored when a chunk stops in the middle of a |
| + * token (string, numeral, boolean or null). |
| + * |
| + * The partial state is used to continue parsing on the next chunk. |
| + * The previous chunk is not retained, any data needed are stored in |
| + * this integer, or in the [buffer] field as a string-building buffer |
| + * or a [_NumberBuffer]. |
| + * |
| + * Prefix state stored in [prefixState] as bits. |
| + * |
| + * ..00 : No partial value (NO_PARTIAL). |
| + * |
| + * ..00001 : Partial string, not inside escape. |
| + * ..00101 : Partial string, after '\'. |
| + * ..vvvv1dd01 : Partial \u escape. |
| + * The 'dd' bits (2-3) encode the number of hex digits seen. |
| + * Bits 5-16 encode the value of the hex digits seen so far. |
| + * |
| + * ..0ddd10 : Partial numeral. |
| + * The `ddd` bits store the parts of in the numeral seen so |
| + * far, as the constants `NUM_*` defined above. |
| + * The characters of the numeral are stored in [buffer] |
| + * as a [_NumberBuffer]. |
| + * |
| + * ..0ddd0011 : Partial 'null' keyword. |
| + * ..0ddd0111 : Partial 'true' keyword. |
| + * ..0ddd1011 : Partial 'false' keyword. |
| + * For all three keywords, the `ddd` bits encode the number |
| + * of letters seen. |
| + */ |
| + int partialState = NO_PARTIAL; |
| + |
| + /** |
| + * Extra data stored while parsing a primitive value. |
| + * May be set during parsing, always set at chunk end if a value is partial. |
| + * |
| + * May contain a string buffer while parsing strings. |
| + */ |
| + var buffer = null; |
| + |
| + _ChunkedJsonParser(this.listener); |
| + |
| + /** |
| + * Push the current parse [state] on a stack. |
| + * |
| + * State is pushed when a new array or object literal starts, |
| + * so the parser can go back to the correct value when the literal ends. |
| + */ |
| + void saveState(int state) { |
| + states.add(state); |
| + } |
| + |
| + /** |
| + * Restore a state pushed with [saveState]. |
| + */ |
| + int restoreState() { |
| + return states.removeLast(); // Throws if empty. |
| + } |
| + |
| + /** |
| + * Finalizes the parsing. |
| + * |
| + * If the source ends in a number, it will be completed. Any other partial |
| + * state is an error. |
|
Søren Gjesse
2014/10/24 11:12:24
And the states stack is empty, right?
Lasse Reichstein Nielsen
2014/10/27 12:42:33
That's what the next paragraph tries to say. I'll
|
| + * |
| + * Throws if the source read so far doesn't end up with a complete |
| + * parsed value. |
| + */ |
| + void close() { |
| + if (partialState != NO_PARTIAL) { |
| + int partialType = partialState & MASK_PARTIAL; |
| + if (partialType == PARTIAL_NUMERAL) { |
| + int numState = partialState & ~MASK_PARTIAL; |
| + // A partial number might be a valid number if we know it's done. |
| + // There is an unnecessary overhead if input is a single number, |
| + // but this is assumed to be rare. |
| + _NumberBuffer buffer = this.buffer; |
| + this.buffer = null; |
| + finishChunkNumber(numState, 0, 0, buffer); |
| + } else if (partialType == PARTIAL_STRING) { |
| + fail(chunkEnd, "Unterminate string"); |
|
Søren Gjesse
2014/10/24 11:12:24
Unterminated
Lasse Reichstein Nielsen
2014/10/27 12:42:32
Done.
|
| + } else { |
| + assert(partialType == PARTIAL_KEYWORD); |
| + fail(chunkEnd); // Incomplete literal. |
| + } |
| + } |
| + if (state != STATE_END) { |
| + fail(chunkEnd); |
| + } |
| + } |
| + |
| + /** |
| + * Read out the result after successfully closing the parser. |
| + * |
| + * The parser is closed by calling [close] or calling [addSourceChunk] with |
| + * `true` as second (`isLast`) argument. |
| + */ |
| + Object get result { |
| + return listener.result; |
| + } |
| + |
| + // Sets the current source chunk. |
|
floitsch
2014/10/20 08:52:44
Make all these comments dartdocs.
Lasse Reichstein Nielsen
2014/10/27 12:42:33
Done.
|
| + void set chunk(var source); |
| + |
| + // Length of current chunk. |
| + int get chunkEnd; |
| + |
| + // Returns the chunk itself. Used by fail to include it in FormatException. |
|
Søren Gjesse
2014/10/24 11:12:23
So the FormatException only have the chunk as the
Lasse Reichstein Nielsen
2014/10/27 12:42:32
Yes, that's all we have.
The FormatException will
|
| + get chunk; |
| + |
| + // Get charcacter/code unit of current chunk. |
| + int getChar(int index); |
| + |
| + // Copy ASCII characters from start to end of chunk into a list. |
| + // Used for number buffer (always copies ASCII, so encoding is not important). |
| + void copyCharsToList(int start, int end, List<int> target); |
| + |
| + // Build a string using input code units. Creates a string buffer |
| + // and enables adding characters and slices to that buffer. |
| + // The buffer is stored in [buffer]. If the string is unterminated, |
| + // the same buffer is used to continue parsing in the next chunk. |
| + void beginString(); |
| + // Add single character code to string being built. |
| + void addCharToString(int charCode); |
| + // Adds slice of current chunk to string being built. |
|
floitsch
2014/10/20 08:52:44
end exclusive?
Lasse Reichstein Nielsen
2014/10/27 12:42:33
Acknowledged.
|
| + void addSliceToString(int start, int end); |
| + // Finalizes the string being built and returns it as a String. |
| + String endString(); |
| + |
| + // Extracts a literal string from a source slice. |
|
Søren Gjesse
2014/10/24 11:12:23
source slice -> chunk slice
Lasse Reichstein Nielsen
2014/10/27 12:42:32
Done.
|
| + // No interpretation of the content is performed, except for converting |
| + // the source format to string. |
| + // This can be implemented more or less efficiently depending on the |
| + // underlying source. |
| + String getString(int start, int end); |
| + |
| + // Parse a slice of input as an integer. |
|
Søren Gjesse
2014/10/24 11:12:24
slice of input -> chunk slice
Lasse Reichstein Nielsen
2014/10/27 12:42:33
Done.
|
| + // The format is expected to be correct. |
| + int parseInt(int start, int end) { |
| + return int.parse(getString(start, end)); |
| + } |
| + |
| + // Parse a slice of input as a double. |
|
Søren Gjesse
2014/10/24 11:12:24
ditto.
Lasse Reichstein Nielsen
2014/10/27 12:42:33
Done.
|
| + // The format is expected to be correct. |
| + double parseDouble(int start, int end) { |
| + return double.parse(getString(start, end)); |
| + } |
| + |
| + // Create a _NumberBuffer containing the digits from [start] to [chunkEnd]. |
| + void createNumberBuffer(int start) { |
| + assert(start >= 0); |
| + assert(start < chunkEnd); |
| + int length = chunkEnd - start; |
| + var buffer = new _NumberBuffer(length); |
| + copyCharsToList(start, chunkEnd, buffer.list); |
| + buffer.length = length; |
| + return buffer; |
| + } |
| + |
| + /** |
| + * Continues parsing a partial value. |
| + */ |
| + int parsePartial(int position) { |
| + if (position == chunkEnd) return position; |
| + int partialState = this.partialState; |
| + assert(partialState != NO_PARTIAL); |
| + int partialType = partialState & MASK_PARTIAL; |
| + this.partialState = NO_PARTIAL; |
| + partialState = partialState & ~MASK_PARTIAL; |
| + assert(partialType != 0); |
| + if (partialType == PARTIAL_STRING) { |
| + position = parsePartialString(position, partialState); |
| + } else if (partialType == PARTIAL_NUMERAL) { |
| + position = parsePartialNumber(position, partialState); |
| + } else if (partialType == PARTIAL_KEYWORD) { |
| + position = parsePartialKeyword(position, partialState); |
| + } |
| + return position; |
| + } |
| + |
| + // Parses the remainder of a number into the number buffer, |
| + // checking syntax as it goes. |
| + // Starts at chunk index 0, and returns the index of the first |
|
Søren Gjesse
2014/10/24 11:12:24
chunk index 0 -> current chunk index?
Lasse Reichstein Nielsen
2014/10/27 12:42:33
At [position] actually.
|
| + // non-digit character found, or chunkEnd if the entire chunk is |
| + // used. |
| + // Throws if a syntax error is detected. |
| + int parsePartialNumber(int position, int state) { |
| + int start = position; |
| + // Primitive implementation, can be optimized. |
| + _NumberBuffer buffer = this.buffer; |
| + this.buffer = null; |
| + int end = chunkEnd; |
| + toBailout: { |
| + if (position == end) break toBailout; |
| + int char = getChar(position); |
| + int digit = char ^ CHAR_0; |
| + if (state == NUM_SIGN) { |
| + if (digit <= 9) { |
| + if (digit == 0) { |
| + state = NUM_ZERO; |
| + } else { |
| + state = NUM_DIGIT; |
| + } |
| + position++; |
| + if (position == end) break toBailout; |
| + char = getChar(position); |
| + digit = char ^ CHAR_0; |
| + } else { |
| + return fail(position); |
| + } |
| + } |
| + if (state == NUM_ZERO) { |
| + if (digit <= 9) return fail(position); |
|
floitsch
2014/10/20 08:52:44
Add comment, why this is not allowed.
Lasse Reichstein Nielsen
2014/10/27 12:42:33
Done.
|
| + state = NUM_DIGIT; |
| + } |
| + while (state == NUM_DIGIT) { |
| + if (digit > 9) { |
| + if (char == DECIMALPOINT) { |
| + state = NUM_DOT; |
| + } else if ((char | 0x20) == CHAR_e) { |
| + state = NUM_E; |
| + } else { |
| + finishChunkNumber(state, start, position, buffer); |
| + return position; |
| + } |
| + } |
| + position++; |
| + if (position == end) break toBailout; |
| + char = getChar(position); |
| + digit = char ^ CHAR_0; |
| + } |
| + if (state == NUM_DOT) { |
| + if (digit > 9) return fail(position); |
| + state = NUM_DOT_DIGIT; |
| + } |
| + while (state == NUM_DOT_DIGIT) { |
| + if (digit > 9) { |
| + if ((char | 0x20) == CHAR_e) { |
| + state = NUM_E; |
| + } else { |
| + finishChunkNumber(state, start, position, buffer); |
| + return position; |
| + } |
| + } |
| + position++; |
| + if (position == end) break toBailout; |
| + char = getChar(position); |
| + digit = char ^ CHAR_0; |
| + } |
| + if (state == NUM_E) { |
| + if (char == PLUS || char == MINUS) { |
| + state = NUM_E_SIGN; |
| + position++; |
| + if (position == end) break toBailout; |
| + char = getChar(position); |
| + digit = char ^ CHAR_0; |
| + } |
| + } |
| + assert(state >= NUM_E); |
| + while (digit <= 9) { |
| + state = NUM_E_DIGIT; |
| + position++; |
| + if (position == end) break toBailout; |
| + char = getChar(position); |
| + digit = char ^ CHAR_0; |
| + } |
| + finishChunkNumber(state, start, position, buffer); |
| + return position; |
| + } |
| + // Bailout code in case the current chunk ends while parsing the numeral. |
| + assert(position == end); |
| + continueChunkNumber(state, start, buffer); |
| + return chunkEnd; |
| + } |
| + |
| + int parsePartialString(int position, int partialState) { |
| + if (partialState == STR_PLAIN) { |
| + return parseStringToBuffer(position); |
| + } |
| + if (partialState == STR_ESCAPE) { |
| + position = parseStringEscape(position); |
| + // parseStringEscape sets partialState if it sees the end. |
| + if (position == chunkEnd) return position; |
| + return parseStringToBuffer(position); |
| + } |
| + assert((partialState & STR_U) != 0); |
| + int value = partialState >> STR_U_VALUE_SHIFT; |
| + int count = (partialState >> STR_U_COUNT_SHIFT) & TWO_BIT_MASK; |
| + for (int i = count; i < 4; i++, position++) { |
| + if (position == chunkEnd) return chunkStringEscapeU(i, value); |
| + int char = getChar(position); |
| + int digit = parseHexDigit(char); |
| + if (digit < 0) fail(position, "Invalid hex digit"); |
| + value = 16 * value + digit; |
| + } |
| + addCharToString(value); |
| + return parseStringToBuffer(position); |
| + } |
| + |
| + int parsePartialKeyword(int position, int partialState) { |
| + int keywordType = partialState & KWD_TYPE_MASK; |
| + int count = partialState >> KWD_COUNT_SHIFT; |
| + int keywordTypeIndex = keywordType >> KWD_TYPE_SHIFT; |
| + String keyword = const ["null", "true", "false"][keywordTypeIndex]; |
| + assert(count < keyword.length); |
| + do { |
| + if (position == chunkEnd) { |
| + this.partialState = |
| + PARTIAL_KEYWORD | keywordType | (count << KWD_COUNT_SHIFT); |
| + return chunkEnd; |
| + } |
| + int expectedChar = keyword.codeUnitAt(count); |
| + if (getChar(position) != expectedChar) return fail(position); |
| + position++; |
| + count++; |
| + } while (count < keyword.length); |
| + if (keywordType == KWD_NULL) { |
| + listener.handleNull(); |
| + } else { |
| + listener.handleBool(keywordType == KWD_TRUE); |
| + } |
| + return position; |
| + } |
| + |
| + int parseHexDigit(int char) { |
| + int digit = char ^ 0x30; |
| + if (digit <= 9) return digit; |
| + int letter = (char | 0x20) ^ 0x60; |
| + // values 1 .. 6 are 'a' through 'f' |
| + if (letter <= 6 && letter > 0) return letter + 9; |
| + return -1; |
| + } |
| /** Parses [source], or throws if it fails. */ |
| - void parse() { |
| - final List<int> states = <int>[]; |
| - int state = STATE_INITIAL; |
| - int position = 0; |
| - int length = source.length; |
| + void parse(int position) { |
| + int length = chunkEnd; |
| + if (partialState != NO_PARTIAL) { |
| + position = parsePartial(position); |
| + if (position == length) return; |
| + } |
| + int state = this.state; |
| while (position < length) { |
| - int char = source.codeUnitAt(position); |
| + int char = getChar(position); |
| switch (char) { |
| case SPACE: |
| case CARRIAGE_RETURN: |
| @@ -246,41 +698,41 @@ class _JsonParser { |
| position++; |
| break; |
| case QUOTE: |
| - if ((state & ALLOW_STRING_MASK) != 0) fail(position); |
| - position = parseString(position + 1); |
| + if ((state & ALLOW_STRING_MASK) != 0) return fail(position); |
| state |= VALUE_READ_BITS; |
| + position = parseString(position + 1); |
| break; |
| case LBRACKET: |
| - if ((state & ALLOW_VALUE_MASK) != 0) fail(position); |
| + if ((state & ALLOW_VALUE_MASK) != 0) return fail(position); |
| listener.beginArray(); |
| - states.add(state); |
| + saveState(state); |
| state = STATE_ARRAY_EMPTY; |
| position++; |
| break; |
| case LBRACE: |
| - if ((state & ALLOW_VALUE_MASK) != 0) fail(position); |
| + if ((state & ALLOW_VALUE_MASK) != 0) return fail(position); |
| listener.beginObject(); |
| - states.add(state); |
| + saveState(state); |
| state = STATE_OBJECT_EMPTY; |
| position++; |
| break; |
| case CHAR_n: |
| - if ((state & ALLOW_VALUE_MASK) != 0) fail(position); |
| - position = parseNull(position); |
| + if ((state & ALLOW_VALUE_MASK) != 0) return fail(position); |
| state |= VALUE_READ_BITS; |
| + position = parseNull(position); |
| break; |
| case CHAR_f: |
| - if ((state & ALLOW_VALUE_MASK) != 0) fail(position); |
| - position = parseFalse(position); |
| + if ((state & ALLOW_VALUE_MASK) != 0) return fail(position); |
| state |= VALUE_READ_BITS; |
| + position = parseFalse(position); |
| break; |
| case CHAR_t: |
| - if ((state & ALLOW_VALUE_MASK) != 0) fail(position); |
| - position = parseTrue(position); |
| + if ((state & ALLOW_VALUE_MASK) != 0) return fail(position); |
| state |= VALUE_READ_BITS; |
| + position = parseTrue(position); |
| break; |
| case COLON: |
| - if (state != STATE_OBJECT_KEY) fail(position); |
| + if (state != STATE_OBJECT_KEY) return fail(position); |
| listener.propertyName(); |
| state = STATE_OBJECT_COLON; |
| position++; |
| @@ -295,7 +747,7 @@ class _JsonParser { |
| state = STATE_ARRAY_COMMA; |
| position++; |
| } else { |
| - fail(position); |
| + return fail(position); |
| } |
| break; |
| case RBRACKET: |
| @@ -305,9 +757,9 @@ class _JsonParser { |
| listener.arrayElement(); |
| listener.endArray(); |
| } else { |
| - fail(position); |
| + return fail(position); |
| } |
| - state = states.removeLast() | VALUE_READ_BITS; |
| + state = restoreState() | VALUE_READ_BITS; |
| position++; |
| break; |
| case RBRACE: |
| @@ -317,19 +769,19 @@ class _JsonParser { |
| listener.propertyValue(); |
| listener.endObject(); |
| } else { |
| - fail(position); |
| + return fail(position); |
| } |
| - state = states.removeLast() | VALUE_READ_BITS; |
| + state = restoreState() | VALUE_READ_BITS; |
| position++; |
| break; |
| default: |
| if ((state & ALLOW_VALUE_MASK) != 0) fail(position); |
| - position = parseNumber(char, position); |
| state |= VALUE_READ_BITS; |
| + position = parseNumber(char, position); |
| break; |
| } |
| } |
| - if (state != STATE_END) fail(position); |
| + this.state = state; |
| } |
| /** |
| @@ -338,12 +790,14 @@ class _JsonParser { |
| * [:source[position]:] must be "t". |
| */ |
| int parseTrue(int position) { |
| - assert(source.codeUnitAt(position) == CHAR_t); |
| - if (source.length < position + 4) fail(position, "Unexpected identifier"); |
| - if (source.codeUnitAt(position + 1) != CHAR_r || |
| - source.codeUnitAt(position + 2) != CHAR_u || |
| - source.codeUnitAt(position + 3) != CHAR_e) { |
| - fail(position); |
| + assert(getChar(position) == CHAR_t); |
| + if (chunkEnd < position + 4) { |
| + return parseKeywordPrefix(position, "true", KWD_TRUE); |
| + } |
| + if (getChar(position + 1) != CHAR_r || |
| + getChar(position + 2) != CHAR_u || |
| + getChar(position + 3) != CHAR_e) { |
| + return fail(position); |
| } |
| listener.handleBool(true); |
| return position + 4; |
| @@ -355,13 +809,15 @@ class _JsonParser { |
| * [:source[position]:] must be "f". |
| */ |
| int parseFalse(int position) { |
| - assert(source.codeUnitAt(position) == CHAR_f); |
| - if (source.length < position + 5) fail(position, "Unexpected identifier"); |
| - if (source.codeUnitAt(position + 1) != CHAR_a || |
| - source.codeUnitAt(position + 2) != CHAR_l || |
| - source.codeUnitAt(position + 3) != CHAR_s || |
| - source.codeUnitAt(position + 4) != CHAR_e) { |
| - fail(position); |
| + assert(getChar(position) == CHAR_f); |
| + if (chunkEnd < position + 5) { |
| + return parseKeywordPrefix(position, "false", KWD_FALSE); |
| + } |
| + if (getChar(position + 1) != CHAR_a || |
| + getChar(position + 2) != CHAR_l || |
| + getChar(position + 3) != CHAR_s || |
| + getChar(position + 4) != CHAR_e) { |
| + return fail(position); |
| } |
| listener.handleBool(false); |
| return position + 5; |
| @@ -373,17 +829,33 @@ class _JsonParser { |
| * [:source[position]:] must be "n". |
| */ |
| int parseNull(int position) { |
| - assert(source.codeUnitAt(position) == CHAR_n); |
| - if (source.length < position + 4) fail(position, "Unexpected identifier"); |
| - if (source.codeUnitAt(position + 1) != CHAR_u || |
| - source.codeUnitAt(position + 2) != CHAR_l || |
| - source.codeUnitAt(position + 3) != CHAR_l) { |
| - fail(position); |
| + assert(getChar(position) == CHAR_n); |
| + if (chunkEnd < position + 4) { |
| + return parseKeywordPrefix(position, "null", KWD_NULL); |
| + } |
| + if (getChar(position + 1) != CHAR_u || |
| + getChar(position + 2) != CHAR_l || |
| + getChar(position + 3) != CHAR_l) { |
| + return fail(position); |
| } |
| listener.handleNull(); |
| return position + 4; |
| } |
| + int parseKeywordPrefix(int position, String chars, int type) { |
| + assert(getChar(position) == chars.codeUnitAt(0)); |
| + int length = chunkEnd; |
| + int start = position; |
| + int count = 1; |
| + while (++position < length) { |
| + int char = getChar(position); |
| + if (char != chars.codeUnitAt(count)) return fail(start); |
| + count++; |
| + } |
| + this.partialState = PARTIAL_KEYWORD | type | (count << KWD_COUNT_SHIFT); |
| + return length; |
| + } |
| + |
| /** |
| * Parses a string value. |
| * |
| @@ -394,92 +866,172 @@ class _JsonParser { |
| // Format: '"'([^\x00-\x1f\\\"]|'\\'[bfnrt/\\"])*'"' |
| // Initial position is right after first '"'. |
| int start = position; |
| - while (position < source.length) { |
| - int char = source.codeUnitAt(position++); |
| + int end = chunkEnd; |
| + while (position < end) { |
| + int char = getChar(position++); |
| // BACKSLASH is larger than QUOTE and SPACE. |
| if (char > BACKSLASH) { |
| continue; |
| } |
| if (char == BACKSLASH) { |
| - return parseStringWithEscapes(start, position - 1); |
| + beginString(); |
| + addSliceToString(start, position - 1); |
| + return parseStringToBuffer(position - 1); |
| } |
| if (char == QUOTE) { |
| - listener.handleString(source.substring(start, position - 1)); |
| + listener.handleString(getString(start, position - 1)); |
| return position; |
| } |
| if (char < SPACE) { |
| fail(position - 1, "Control character in string"); |
| } |
| } |
| - fail(start - 1, "Unterminated string"); |
| + beginString(); |
| + addSliceToString(start, end); |
| + return chunkString(STR_PLAIN); |
| } |
| - int parseStringWithEscapes(start, position) { |
| - // Backslash escape detected. Collect character codes for rest of string. |
| - int firstEscape = position; |
| - List<int> chars = <int>[]; |
| - for (int i = start; i < firstEscape; i++) { |
| - chars.add(source.codeUnitAt(i)); |
| - } |
| - position++; |
| + int chunkString(int stringState) { |
| + partialState = PARTIAL_STRING | stringState; |
| + return chunkEnd; |
| + } |
| + |
| + int chunkStringEscapeU(int count, int value) { |
| + partialState = PARTIAL_STRING | STR_U | |
| + (count << STR_U_COUNT_SHIFT) | |
| + (value << STR_U_VALUE_SHIFT); |
| + return chunkEnd; |
| + } |
| + |
| + int parseStringToBuffer(position) { |
| + int end = chunkEnd; |
| + int start = position; |
| while (true) { |
| - if (position == source.length) { |
| - fail(start - 1, "Unterminated string"); |
| + if (position == end) { |
| + if (position > start) { |
| + addSliceToString(start, position); |
| + } |
| + return chunkString(STR_PLAIN); |
| } |
| - int char = source.codeUnitAt(position); |
| - switch (char) { |
| - case CHAR_b: char = BACKSPACE; break; |
| - case CHAR_f: char = FORM_FEED; break; |
| - case CHAR_n: char = NEWLINE; break; |
| - case CHAR_r: char = CARRIAGE_RETURN; break; |
| - case CHAR_t: char = TAB; break; |
| - case SLASH: |
| - case BACKSLASH: |
| - case QUOTE: |
| - break; |
| - case CHAR_u: |
| - int hexStart = position - 1; |
| - int value = 0; |
| - for (int i = 0; i < 4; i++) { |
| - position++; |
| - if (position == source.length) { |
| - fail(start - 1, "Unterminated string"); |
| - } |
| - char = source.codeUnitAt(position); |
| - char -= 0x30; |
| - if (char < 0) fail(hexStart, "Invalid unicode escape"); |
| - if (char < 10) { |
| - value = value * 16 + char; |
| - } else { |
| - char = (char | 0x20) - 0x31; |
| - if (char < 0 || char > 5) { |
| - fail(hexStart, "Invalid unicode escape"); |
| - } |
| - value = value * 16 + char + 10; |
| - } |
| - } |
| - char = value; |
| - break; |
| - default: |
| - if (char < SPACE) fail(position, "Control character in string"); |
| - fail(position, "Unrecognized string escape"); |
| + int char = getChar(position++); |
| + if (char > BACKSLASH) continue; |
| + if (char < SPACE) { |
| + fail(position - 1); // Control character in string. |
| + return; |
| } |
| - do { |
| - chars.add(char); |
| - position++; |
| - if (position == source.length) fail(start - 1, "Unterminated string"); |
| - char = source.codeUnitAt(position); |
| - if (char == QUOTE) { |
| - String result = new String.fromCharCodes(chars); |
| - listener.handleString(result); |
| - return position + 1; |
| + if (char == QUOTE) { |
| + int quotePosition = position - 1; |
| + if (quotePosition > start) { |
| + addSliceToString(start, quotePosition); |
| } |
| - if (char < SPACE) { |
| - fail(position, "Control character in string"); |
| + listener.handleString(endString()); |
| + return position; |
| + } |
| + if (char != BACKSLASH) { |
| + continue; |
| + } |
| + // Handle escape. |
| + if (position - 1 > start) { |
| + addSliceToString(start, position - 1); |
| + } |
| + if (position == end) return chunkString(STR_ESCAPE); |
| + position = parseStringEscape(position); |
| + if (position == end) return position; |
| + start = position; |
| + } |
| + return -1; // UNREACHABLE. |
| + } |
| + |
| + int parseStringEscape(int position) { |
|
Søren Gjesse
2014/10/24 11:12:24
Add a comment that position is just after the back
Lasse Reichstein Nielsen
2014/10/27 12:42:32
Done.
|
| + int char = getChar(position++); |
| + int length = chunkEnd; |
| + switch (char) { |
| + case CHAR_b: char = BACKSPACE; break; |
| + case CHAR_f: char = FORM_FEED; break; |
| + case CHAR_n: char = NEWLINE; break; |
| + case CHAR_r: char = CARRIAGE_RETURN; break; |
| + case CHAR_t: char = TAB; break; |
| + case SLASH: |
| + case BACKSLASH: |
| + case QUOTE: |
| + break; |
| + case CHAR_u: |
| + int hexStart = position - 1; |
| + int value = 0; |
| + for (int i = 0; i < 4; i++) { |
| + if (position == length) return chunkStringEscapeU(i, value); |
| + char = getChar(position++); |
| + int digit = char ^ 0x30; |
| + value *= 16; |
| + if (digit <= 9) { |
| + value += digit; |
| + } else { |
| + digit = (char | 0x20) - CHAR_a; |
| + if (digit < 0 || digit > 5) { |
| + return fail(hexStart, "Invalid unicode escape"); |
| + } |
| + value += digit + 10; |
| + } |
| } |
| - } while (char != BACKSLASH); |
| - position++; |
| + char = value; |
| + break; |
| + default: |
| + if (char < SPACE) return fail(position, "Control character in string"); |
| + return fail(position, "Unrecognized string escape"); |
| + } |
| + addCharToString(char); |
| + if (position == length) return chunkString(STR_PLAIN); |
| + return position; |
| + } |
| + |
| + /// Sets up a partial numeral state. |
| + /// Returns chunkEnd to allow easy one-line bailout tests. |
| + int beginChunkNumber(int state, int start) { |
| + int end = chunkEnd; |
| + int length = end - start; |
| + var buffer = new _NumberBuffer(length); |
| + copyCharsToList(start, end, buffer.list, 0); |
| + buffer.length = length; |
| + this.buffer = buffer; |
| + this.partialState = PARTIAL_NUMERAL | state; |
| + return end; |
| + } |
| + |
| + void addNumberChunk(_NumberBuffer buffer, int start, int end, int overhead) { |
| + int length = end - start; |
| + int count = buffer.length; |
| + int newCount = count + length; |
| + int newCapacity = newCount + overhead; |
| + buffer.ensureCapacity(newCapacity); |
| + copyCharsToList(start, end, buffer.list, count); |
| + buffer.length = newCount; |
| + } |
| + |
| + // Continues an already chunked number accross an entire chunk. |
| + int continueChunkNumber(int state, int start, _NumberBuffer buffer) { |
| + int end = chunkEnd; |
| + addNumberChunk(buffer, start, end, _NumberBuffer.kDefaultOverhead); |
| + this.buffer = buffer; |
| + this.partialState = PARTIAL_NUMERAL | state; |
| + return end; |
| + } |
| + |
| + int finishChunkNumber(int state, int start, int end, _NumberBuffer buffer) { |
| + if (state == NUM_ZERO) { |
| + listener.handleNumber(0); |
| + return; |
| } |
| + if (end > start) { |
| + addNumberChunk(buffer, start, end, 0); |
| + } |
| + if (state == NUM_DIGIT) { |
| + listener.handleNumber(buffer.parseInt()); |
| + } else if (state == NUM_DOT_DIGIT || state == NUM_E_DIGIT) { |
| + listener.handleNumber(buffer.parseDouble()); |
| + } else { |
| + fail(chunkEnd, "Unterminated number literal"); |
| + } |
| + return end; |
| } |
| int parseNumber(int char, int position) { |
| @@ -487,89 +1039,544 @@ class _JsonParser { |
| // Format: |
| // '-'?('0'|[1-9][0-9]*)('.'[0-9]+)?([eE][+-]?[0-9]+)? |
| int start = position; |
| - int length = source.length; |
| - int intValue = 0; // Collect int value while parsing. |
| - int intSign = 1; |
| + int length = chunkEnd; |
| + int intValue = 0; // Collect int value while parsing. |
| + double doubleValue = 0.0; // Collect double value while parsing. |
| + int sign = 1; |
| bool isDouble = false; |
| // Break this block when the end of the number literal is reached. |
| // At that time, position points to the next character, and isDouble |
| // is set if the literal contains a decimal point or an exponential. |
| parsing: { |
| if (char == MINUS) { |
| - intSign = -1; |
| + sign = -1; |
| position++; |
| - if (position == length) fail(position, "Missing expected digit"); |
| - char = source.codeUnitAt(position); |
| + if (position == length) return beginChunkNumber(NUM_SIGN, start); |
| + char = getChar(position); |
| } |
| - if (char < CHAR_0 || char > CHAR_9) { |
| - if (intSign < 0) { |
| + int digit = char ^ CHAR_0; |
| + if (digit > 9) { |
| + if (sign < 0) { |
| fail(position, "Missing expected digit"); |
| } else { |
| // If it doesn't even start out as a numeral. |
| fail(position, "Unexpected character"); |
| } |
| } |
| - if (char == CHAR_0) { |
| + if (digit == 0) { |
| position++; |
| - if (position == length) break parsing; |
| - char = source.codeUnitAt(position); |
| - if (CHAR_0 <= char && char <= CHAR_9) { |
| - fail(position); |
| - } |
| + if (position == length) return beginChunkNumber(NUM_ZERO, start); |
| + char = getChar(position); |
| + digit = char ^ CHAR_0; |
| + // If starting with zero, next character must not be digit. |
| + if (digit <= 9) fail(position); |
| } else { |
| do { |
| - intValue = intValue * 10 + (char - CHAR_0); |
| + intValue = 10 * intValue + digit; |
| position++; |
| - if (position == length) break parsing; |
| - char = source.codeUnitAt(position); |
| - } while (CHAR_0 <= char && char <= CHAR_9); |
| + if (position == length) return beginChunkNumber(NUM_DIGIT, start); |
| + char = getChar(position); |
| + digit = char ^ CHAR_0; |
| + } while (digit <= 9); |
| } |
| if (char == DECIMALPOINT) { |
| isDouble = true; |
| + doubleValue = intValue.toDouble(); |
| + intValue = 0; |
| position++; |
| - if (position == length) fail(position, "Missing expected digit"); |
| - char = source.codeUnitAt(position); |
| - if (char < CHAR_0 || char > CHAR_9) fail(position); |
| + if (position == length) return beginChunkNumber(NUM_DOT, start); |
| + char = getChar(position); |
| + digit = char ^ CHAR_0; |
| + if (digit > 9) fail(position); |
| do { |
| + doubleValue = 10.0 * doubleValue + digit; |
| + intValue -= 1; |
|
floitsch
2014/10/20 08:52:44
Don't reuse "intValue".
Afaics this has nothing to
Lasse Reichstein Nielsen
2014/10/27 12:42:32
It is collecting *an* integer value.
I'll just do
|
| position++; |
| - if (position == length) break parsing; |
| - char = source.codeUnitAt(position); |
| - } while (CHAR_0 <= char && char <= CHAR_9); |
| + if (position == length) return beginChunkNumber(NUM_DOT_DIGIT, start); |
| + char = getChar(position); |
| + digit = char ^ CHAR_0; |
| + } while (digit <= 9); |
| } |
| - if (char == CHAR_e || char == CHAR_E) { |
| - isDouble = true; |
| + if ((char | 0x20) == CHAR_e) { |
| + if (!isDouble) { |
| + doubleValue = intValue.toDouble(); |
| + intValue = 0; |
|
floitsch
2014/10/20 08:52:44
ditto.
Lasse Reichstein Nielsen
2014/10/27 12:42:32
Same.
|
| + isDouble = true; |
| + } |
| position++; |
| - if (position == length) fail(position, "Missing expected digit"); |
| - char = source.codeUnitAt(position); |
| + if (position == length) return beginChunkNumber(NUM_E, start); |
| + char = getChar(position); |
| + int expSign = 1; |
| + int exponent = 0; |
| if (char == PLUS || char == MINUS) { |
| + expSign = 0x2C - char; // -1 for MINUS, +1 for PLUS |
| position++; |
| - if (position == length) fail(position, "Missing expected digit"); |
| - char = source.codeUnitAt(position); |
| + if (position == length) return beginChunkNumber(NUM_E_SIGN, start); |
| + char = getChar(position); |
| } |
| - if (char < CHAR_0 || char > CHAR_9) { |
| + digit = char ^ CHAR_0; |
| + if (digit > 9) { |
| fail(position, "Missing expected digit"); |
| } |
| do { |
| + exponent = 10 * exponent + digit; |
| position++; |
| - if (position == length) break parsing; |
| - char = source.codeUnitAt(position); |
| - } while (CHAR_0 <= char && char <= CHAR_9); |
| + if (position == length) return beginChunkNumber(NUM_E_DIGIT, start); |
| + char = getChar(position); |
| + digit = char ^ CHAR_0; |
| + } while (digit <= 9); |
| + intValue += expSign * exponent; |
| } |
| } |
| if (!isDouble) { |
| - listener.handleNumber(intSign * intValue); |
| + listener.handleNumber(sign * intValue); |
| return position; |
| } |
| - // This correctly creates -0.0 for doubles. |
| - listener.handleNumber(_parseDouble(source, start, position)); |
| + const double maxExactDouble = 9007199254740992.0; |
|
floitsch
2014/10/20 08:52:44
comment.
Lasse Reichstein Nielsen
2014/10/27 12:42:32
Done.
|
| + if (doubleValue < maxExactDouble) { |
| + int exponent = intValue; |
| + double signedMantissa = doubleValue * sign; |
| + if (exponent >= -22) { |
| + if (exponent < 0) { |
| + listener.handleNumber(signedMantissa / POWERS_OF_TEN[-exponent]); |
| + return position; |
| + } |
| + if (exponent == 0) { |
| + listener.handleNumber(signedMantissa); |
| + return position; |
| + } |
| + if (exponent <= 22) { |
| + listener.handleNumber(signedMantissa * POWERS_OF_TEN[exponent]); |
| + return position; |
| + } |
| + } |
| + } |
| + listener.handleNumber(parseDouble(start, position)); |
|
floitsch
2014/10/20 08:52:44
comment.
Lasse Reichstein Nielsen
2014/10/27 12:42:33
Done.
|
| + return position; |
| + } |
| + |
| + int fail(int position, [String message]) { |
| + if (message == null) { |
| + message = "Unexpected character"; |
| + if (position == chunkEnd) message = "Unexpected end of input"; |
| + } |
| + throw new FormatException(message, chunk, position); |
| + } |
| +} |
| + |
| +/** |
| + * Chunked JSON parser that parses [String] chunks. |
| + */ |
| +class _JsonStringParser extends _ChunkedJsonParser { |
| + String chunk; |
| + int chunkEnd; |
| + |
| + _JsonStringParser(_JsonListener listener) : super(listener); |
| + |
| + int getChar(int position) => chunk.codeUnitAt(position); |
| + |
| + String getString(int start, int end) { |
| + return chunk.substring(start, end); |
| + } |
| + |
| + void beginString() { |
| + this.buffer = new StringBuffer(); |
| + } |
| + |
| + void addSliceToString(int start, int end) { |
| + StringBuffer buffer = this.buffer; |
| + buffer.write(chunk.substring(start, end)); |
| + } |
| + |
| + void addCharToString(int charCode) { |
| + StringBuffer buffer = this.buffer; |
| + buffer.writeCharCode(charCode); |
| + } |
| + |
| + String endString() { |
| + StringBuffer buffer = this.buffer; |
| + this.buffer = null; |
| + return buffer.toString(); |
| + } |
| + |
| + void copyCharsToList(int start, int end, List target, int offset) { |
| + int length = end - start; |
| + for (int i = 0; i < length; i++) { |
| + target[offset + i] = chunk.codeUnitAt(start + i); |
| + } |
| + } |
| + |
| + double parseDouble(int start, int end) { |
| + return _parseDouble(chunk, start, end); |
| + } |
| +} |
| + |
| +patch class JsonDecoder { |
| + /* patch */ StringConversionSink startChunkedConversion(Sink<Object> sink) { |
| + return new _JsonStringDecoderSink(this._reviver, sink); |
| + } |
| +} |
| + |
| +/** |
| + * Implements the chunked conversion from a JSON string to its corresponding |
| + * object. |
| + * |
| + * The sink only creates one object, but its input can be chunked. |
| + */ |
| +class _JsonStringDecoderSink extends StringConversionSinkBase { |
| + _ChunkedJsonParser _parser; |
| + Function _reviver; |
| + final Sink<Object> _sink; |
| + |
| + _JsonStringDecoderSink(reviver, this._sink) |
| + : _reviver = reviver, _parser = _createParser(reviver); |
| + |
| + static _ChunkedJsonParser _createParser(reviver) { |
| + _BuildJsonListener listener; |
| + if (reviver == null) { |
| + listener = new _BuildJsonListener(); |
| + } else { |
| + listener = new _ReviverJsonListener(reviver); |
| + } |
| + return new _JsonStringParser(listener); |
| + } |
| + |
| + void addSlice(String chunk, int start, int end, bool isLast) { |
| + _parser.chunk = chunk; |
| + _parser.chunkEnd = end; |
| + _parser.parse(start); |
| + if (isLast) _parser.close(); |
| + } |
| + |
| + void add(String chunk) { |
| + addSlice(chunk, 0, chunk.length, false); |
| + } |
| + |
| + void close() { |
| + _parser.close(); |
| + var decoded = _parser.result; |
| + _sink.add(decoded); |
| + _sink.close(); |
| + } |
| + |
| + Utf8ConversionSink asUtf8Sink(bool allowMalformed) { |
| + _parser = null; |
| + return new _JsonUtf8DecoderSink(_reviver, _sink, allowMalformed); |
| + } |
| +} |
| + |
| +class _Utf8StringBuffer { |
| + static const int INITIAL_CAPACITY = 32; |
| + // Partial state encoding. |
| + static const int MASK_TWO_BIT = 0x03; |
| + static const int MASK_SIZE = MASK_TWO_BIT; |
| + static const int SHIFT_MISSING = 2; |
| + static const int SHIFT_VALUE = 4; |
| + static const int NO_PARTIAL = 0; |
| + |
| + // UTF-8 encoding and limits. |
| + static const int MAX_ASCII = 127; |
| + static const int MAX_TWO_BYTE = 0x7ff; |
| + static const int MAX_THREE_BYTE = 0xffff; |
| + static const int MAX_UNICODE = 0X10ffff; |
| + static const int MASK_TWO_BYTE = 0x1f; |
| + static const int MASK_THREE_BYTE = 0x0f; |
| + static const int MASK_FOUR_BYTE = 0x07; |
| + static const int MASK_CONTINUE_TAG = 0xC0; |
| + static const int MASK_CONTINUE_VALUE = 0x3f; |
| + static const int CONTINUE_TAG = 0x80; |
| + |
| + // UTF-16 surrogate encoding. |
| + static const int LEAD_SURROGATE = 0xD800; |
| + static const int TAIL_SURROGATE = 0xDC00; |
| + static const int SHIFT_HIGH_SURROGATE = 10; |
| + static const int MASK_LOW_SURROGATE = 0x3ff; |
| + |
|
Søren Gjesse
2014/10/24 11:12:24
Comment that buffer starts as Uint8, but might cha
Lasse Reichstein Nielsen
2014/10/27 12:42:32
Comment added. We convert all non-BMP characters t
|
| + List<int> buffer = new Uint8List(INITIAL_CAPACITY); |
| + int length = 0; |
| + int partialState = NO_PARTIAL; |
| + bool isLatin1 = true; |
| + // If allowing malformed, invalid UTF-8 sequences are converted to |
| + // U+FFFD. |
| + bool allowMalformed; |
| + |
| + _Utf8StringBuffer(this.allowMalformed); |
| + |
| + /** |
| + * Parse the continuation of a multi-byte UTF-8 sequence. |
| + * |
| + * Parse [utf8] from [position] to [end]. If the sequence extends beyond |
| + * `end`, store the partial state in [partialState], and continue from there |
| + * on the next added slice. |
| + * |
| + * The [size] is the number of expected continuation bytes total, |
| + * and [missing] is the number of remaining continuation bytes. |
| + * The [size] is used to detect overlong encodings. |
| + * The [value] is the value collected so far. |
| + * |
| + * When called after seeing the first multi-byte marker, the [size] and |
| + * [missing] values are always the same, but they may differ if continuing |
| + * after a partial sequence. |
| + */ |
| + int addContinuation(List<int> utf8, int position, int end, |
| + int size, int missing, int value) { |
| + int codeEnd = position + missing; |
| + do { |
| + if (position == end) { |
| + missing = codeEnd - position; |
| + partialState = |
| + size | (missing << SHIFT_MISSING) | (value << SHIFT_VALUE); |
| + return end; |
| + } |
| + int char = utf8[position]; |
| + if ((char & MASK_CONTINUE_TAG) != CONTINUE_TAG) { |
| + if (allowMalformed) { |
| + addCharCode(0xFFFD); |
| + return position; |
| + } |
| + throw new FormatException("Expected UTF-8 continuation byte, " |
| + "found $char", utf8, position); |
| + } |
| + value = 64 * value + (char & MASK_CONTINUE_VALUE); |
| + position++; |
| + } while (position < codeEnd); |
| + if (value <= const [0, MAX_ASCII, MAX_TWO_BYTE, MAX_THREE_BYTE][size]) { |
| + // Over-long encoding. |
| + if (allowMalformed) { |
| + value = 0xFFFD; |
| + } else { |
| + throw new FormatException( |
| + "Invalid encoding: U+${value.toRadixString(16).padLeft(4, '0')}" |
| + " encoded in ${size + 1} bytes.", utf8, position - 1); |
| + } |
| + } |
| + addCharCode(value); |
| return position; |
| } |
| - static double _parseDouble(String source, int start, int end) |
| - native "Double_parse"; |
| + void addCharCode(int char) { |
| + assert(char >= 0); |
| + assert(char <= MAX_UNICODE); |
| + if (partialState != NO_PARTIAL) { |
| + if (allowMalformed) { |
| + partialState = NO_PARTIAL; |
| + addCharCode(0xFFFD); |
| + } else { |
| + throw new FormatException("Incomplete UTF-8 sequence", utf8); |
| + } |
| + } |
| + if (isLatin1 && char > 0xff) { |
| + _to16Bit(); // Also grows a little if close to full. |
| + } |
| + int length = this.length; |
| + if (char <= MAX_THREE_BYTE) { |
| + if (length == buffer.length) _grow(); |
| + buffer[length] = char; |
| + this.length = length + 1; |
| + return; |
| + } |
| + if (length + 2 > buffer.length) _grow(); |
| + int bits = char - 0x10000; |
| + buffer[length] = LEAD_SURROGATE | (bits >> SHIFT_HIGH_SURROGATE); |
| + buffer[length + 1] = TAIL_SURROGATE | (bits & MASK_LOW_SURROGATE); |
| + this.length = length + 2; |
| + } |
| + |
| + void _to16Bit() { |
| + assert(isLatin1); |
| + int newCapacity = buffer.length; |
| + if (newCapacity - length < INITIAL_CAPACITY) { |
| + newCapacity = length + INITIAL_CAPACITY; |
| + } |
| + Uint16List newBuffer = new Uint16List(newCapacity); |
| + newBuffer.setRange(0, length, buffer, 0); |
| + buffer = newBuffer; |
| + isLatin1 = false; |
| + } |
| + |
| + void _grow() { |
| + int newCapacity = buffer.length * 2; |
| + List newBuffer; |
| + if (isLatin1) { |
| + newBuffer = new Uint8List(newCapacity); |
| + } else { |
| + newBuffer = new Uint16List(newCapacity); |
| + } |
| + newBuffer.setRange(0, length, buffer); |
| + buffer = newBuffer; |
| + } |
| + |
| + void addSlice(List<int> utf8, int position, int end) { |
| + assert(position < end); |
| + if (partialState > 0) { |
| + int continueByteCount = (partialState & MASK_TWO_BIT); |
| + int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT; |
| + int value = partialState >> SHIFT_VALUE; |
| + partialState = NO_PARTIAL; |
| + position = addContinuation(utf8, position, end, |
| + continueByteCount, missing, value); |
| + if (position == end) return; |
| + } |
| + int index = length; |
| + int capacity = buffer.length; |
| + while (position < end) { |
| + int char = utf8[position]; |
| + if (char <= MAX_ASCII) { |
| + if (index == capacity) _grow(); |
| + buffer[index++] = char; |
| + position++; |
| + continue; |
| + } |
| + length = index; |
| + if ((char & MASK_CONTINUE_TAG) == CONTINUE_TAG) { |
| + if (allowMalformed) { |
| + addCharCode(0xFFFD); |
| + position++; |
| + } else { |
| + throw new FormatException("Unexepected UTF-8 continuation byte", |
| + utf8, position); |
| + } |
| + } else if (char < 0xE0) { // C0-DF |
| + // Two-byte. |
| + position = addContinuation(utf8, position + 1, end, 1, 1, |
| + char & MASK_TWO_BYTE); |
| + } else if (char < 0xF0) { // E0-EF |
| + // Three-byte. |
| + position = addContinuation(utf8, position + 1, end, 2, 2, |
| + char & MASK_THREE_BYTE); |
| + } else if (char < 0xF8) { // F0-F7 |
| + // Four-byte. |
| + position = addContinuation(utf8, position + 1, end, 3, 3, |
| + char & MASK_FOUR_BYTE); |
| + } else { |
| + if (allowMalformed) { |
| + addCharCode(0xFFFD); |
| + position++; |
| + } else { |
| + throw new FormatException("Invalid UTF-8 byte: $char", |
| + utf8, position); |
| + } |
| + } |
| + index = length; |
| + } |
| + length = index; |
| + } |
| + |
| + String toString() { |
| + if (partialState != NO_PARTIAL) { |
| + if (allowMalformed) { |
| + partialState = NO_PARTIAL; |
| + addCharCode(0xFFFD); |
| + } else { |
| + int continueByteCount = (partialState & MASK_TWO_BIT); |
| + int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT; |
| + int value = partialState >> SHIFT_VALUE; |
| + int seenByteCount = continueByteCount - missing + 1; |
| + List source = new Uint8List(seenByteCount); |
| + while (seenByteCount > 1) { |
| + seenByteCount--; |
| + source[seenByteCount] = CONTINUE_TAG | (value & MASK_CONTINUE_VALUE); |
| + value >>= 6; |
| + } |
| + source[0] = value | (0x3c0 >> (continueByteCount - 1)); |
| + throw new FormatException("Incomplete UTF-8 sequence", |
| + source, source.length); |
| + } |
| + } |
| + return new String.fromCharCodes(buffer, 0, length); |
| + } |
| +} |
| + |
| +/** |
| + * Chunked JSON parser that parses UTF-8 chunks. |
| + */ |
| +class _JsonUtf8Parser extends _ChunkedJsonParser { |
| + final bool allowMalformed; |
| + List<int> chunk; |
| + int chunkEnd; |
| + |
| + _JsonUtf8Parser(_JsonListener listener, this.allowMalformed) |
| + : super(listener); |
| + |
| + int getChar(int position) => chunk[position]; |
| + |
| + String getString(int start, int end) { |
| + beginString(); |
| + addSliceToString(start, end); |
| + String result = endString(); |
| + return result; |
| + } |
| + |
| + void beginString() { |
| + this.buffer = new _Utf8StringBuffer(allowMalformed); |
| + } |
| + |
| + void addSliceToString(int start, int end) { |
| + _Utf8StringBuffer buffer = this.buffer; |
| + buffer.addSlice(chunk, start, end); |
| + } |
| + |
| + void addCharToString(int charCode) { |
| + _Utf8StringBuffer buffer = this.buffer; |
| + buffer.addCharCode(charCode); |
| + } |
| + |
| + String endString() { |
| + _Utf8StringBuffer buffer = this.buffer; |
| + this.buffer = null; |
| + return buffer.toString(); |
| + } |
| + |
| + void copyCharsToList(int start, int end, List target, int offset) { |
| + int length = end - start; |
| + target.setRange(offset, offset + length, chunk, start); |
| + } |
| + |
| + double parseDouble(int start, int end) { |
| + String string = getString(start, end); |
| + reutrn _parseDouble(string, 0, string.length); |
| + } |
| +} |
| + |
| +double _parseDouble(String source, int start, int end) |
| + native "Double_parse"; |
| + |
| +/** |
| + * Implements the chunked conversion from a UTF-8 encoding of JSON |
| + * to its corresponding object. |
| + */ |
| +class _JsonUtf8DecoderSink extends ByteConversionSinkBase { |
| + _ChunkedUtf8Parser _parser; |
| + final Sink<Object> _sink; |
| + |
| + _JsonUtf8DecoderSink(reviver, this._sink, bool allowMalformed) |
| + : _parser = _createParser(reviver, allowMalformed); |
| + |
| + static _ChunkedJsonParser _createParser(reviver, bool allowMalformed) { |
| + _BuildJsonListener listener; |
| + if (reviver == null) { |
| + listener = new _BuildJsonListener(); |
| + } else { |
| + listener = new _ReviverJsonListener(reviver); |
| + } |
| + return new _JsonUtf8Parser(listener, allowMalformed); |
| + } |
| + |
| + void addSlice(List<int> chunk, int start, int end, bool isLast) { |
| + _parser.chunk = chunk; |
| + _parser.chunkEnd = end; |
| + _parser.parse(start); |
| + if (isLast) _parser.close(); |
| + } |
| + |
| + void add(List<int> chunk) { |
| + addSlice(chunk, 0, chunk.length, false); |
| + } |
| - void fail(int position, [String message]) { |
| - if (message == null) message = "Unexpected character"; |
| - throw new FormatException(message, source, position); |
| + void close() { |
| + _parser.close(); |
| + var decoded = _parser.result; |
| + _sink.add(decoded); |
| + _sink.close(); |
| } |
| } |