Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 import "dart:_internal" show POWERS_OF_TEN; | |
| 6 | |
| 5 // JSON conversion. | 7 // JSON conversion. |
| 6 | 8 |
| 7 patch _parseJson(String json, reviver(var key, var value)) { | 9 patch _parseJson(String json, reviver(var key, var value)) { |
| 8 _BuildJsonListener listener; | 10 _BuildJsonListener listener; |
| 9 if (reviver == null) { | 11 if (reviver == null) { |
| 10 listener = new _BuildJsonListener(); | 12 listener = new _BuildJsonListener(); |
| 11 } else { | 13 } else { |
| 12 listener = new _ReviverJsonListener(reviver); | 14 listener = new _ReviverJsonListener(reviver); |
| 13 } | 15 } |
| 14 new _JsonParser(json, listener).parse(); | 16 var parser = new _JsonStringParser(listener); |
| 17 parser.chunk = json; | |
| 18 parser.chunkEnd = json.length; | |
| 19 parser.parse(0); | |
| 20 parser.close(); | |
| 15 return listener.result; | 21 return listener.result; |
| 16 } | 22 } |
| 17 | 23 |
| 18 //// Implementation /////////////////////////////////////////////////////////// | 24 //// Implementation /////////////////////////////////////////////////////////// |
| 19 | 25 |
| 20 // Simple API for JSON parsing. | 26 // Simple API for JSON parsing. |
| 21 | 27 |
| 28 /** | |
| 29 * Listener for parsing events from [_ChunkedJsonParser]. | |
| 30 */ | |
| 22 abstract class _JsonListener { | 31 abstract class _JsonListener { |
| 23 void handleString(String value) {} | 32 void handleString(String value) {} |
| 24 void handleNumber(num value) {} | 33 void handleNumber(num value) {} |
| 25 void handleBool(bool value) {} | 34 void handleBool(bool value) {} |
| 26 void handleNull() {} | 35 void handleNull() {} |
| 27 void beginObject() {} | 36 void beginObject() {} |
| 28 void propertyName() {} | 37 void propertyName() {} |
| 29 void propertyValue() {} | 38 void propertyValue() {} |
| 30 void endObject() {} | 39 void endObject() {} |
| 31 void beginArray() {} | 40 void beginArray() {} |
| 32 void arrayElement() {} | 41 void arrayElement() {} |
| 33 void endArray() {} | 42 void endArray() {} |
| 34 } | 43 } |
| 35 | 44 |
| 36 /** | 45 /** |
| 37 * A [JsonListener] that builds data objects from the parser events. | 46 * A [_JsonListener] that builds data objects from the parser events. |
| 38 * | 47 * |
| 39 * This is a simple stack-based object builder. It keeps the most recently | 48 * This is a simple stack-based object builder. It keeps the most recently |
| 40 * seen value in a variable, and uses it depending on the following event. | 49 * seen value in a variable, and uses it depending on the following event. |
| 41 */ | 50 */ |
| 42 class _BuildJsonListener extends _JsonListener { | 51 class _BuildJsonListener extends _JsonListener { |
| 43 /** | 52 /** |
| 44 * Stack used to handle nested containers. | 53 * Stack used to handle nested containers. |
| 45 * | 54 * |
| 46 * The current container is pushed on the stack when a new one is | 55 * The current container is pushed on the stack when a new one is |
| 47 * started. If the container is a [Map], there is also a current [key] | 56 * started. If the container is a [Map], there is also a current [key] |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 128 void propertyValue() { | 137 void propertyValue() { |
| 129 value = reviver(key, value); | 138 value = reviver(key, value); |
| 130 super.propertyValue(); | 139 super.propertyValue(); |
| 131 } | 140 } |
| 132 | 141 |
| 133 get result { | 142 get result { |
| 134 return reviver(null, value); | 143 return reviver(null, value); |
| 135 } | 144 } |
| 136 } | 145 } |
| 137 | 146 |
| 138 class _JsonParser { | 147 /** |
| 148 * Buffer holding parts of a numeral. | |
| 149 * | |
| 150 * The buffer contains the characters of a JSON number. | |
| 151 * These are all ASCII, so an [Uint8List] is used as backing store. | |
| 152 * | |
| 153 * This buffer is used when a JSON number is split between separate chunks. | |
| 154 * | |
| 155 */ | |
| 156 class _NumberBuffer { | |
| 157 static const int kMinCapacity = 16; | |
|
floitsch
2014/10/20 08:52:44
constants in Dart don't start with "k".
maybe they
Lasse Reichstein Nielsen
2014/10/27 12:42:32
I know. The style guide changed so they are no lon
| |
| 158 static const int kDefaultOverhead = 5; | |
| 159 Uint8List list; | |
| 160 int length = 0; | |
| 161 _NumberBuffer(int initialCapacity) | |
| 162 : list = new Uint8List(_initialCapacity(initialCapacity)); | |
| 163 | |
| 164 int get capacity => list.length; | |
| 165 | |
| 166 // Pick an initial capacity greater than the first part's size. | |
| 167 // The typical use case has two parts, this is the attempt at | |
| 168 // guessing the size of the second part without overdoing it. | |
| 169 // The default estimate of the second part is [kDefaultOverhead], | |
| 170 // then round to multiplum of four, and return the result, | |
| 171 // or [kMinCapacity] if that is greater. | |
| 172 static int _initialCapacity(int minCapacity) { | |
| 173 minCapacity += kDefaultOverhead; | |
| 174 if (minCapacity < kMinCapacity) return kMinCapacity; | |
| 175 minCapacity = (minCapacity + 3) & ~3; // Round to multile of four. | |
|
floitsch
2014/10/20 08:52:44
multiple
Lasse Reichstein Nielsen
2014/10/27 12:42:32
Done.
| |
| 176 return minCapacity; | |
| 177 } | |
| 178 | |
| 179 // Grows to the exact size asked for. | |
| 180 void ensureCapacity(int newCapcity) { | |
|
floitsch
2014/10/20 08:52:44
newCapacity
Lasse Reichstein Nielsen
2014/10/27 12:42:33
Done.
| |
| 181 Uint8List list = this.list; | |
| 182 if (newCapcity <= list.length) return; | |
| 183 Uint8List newList = new Uint8List(newCapcity); | |
| 184 newList.setRange(0, list.length, list, 0); | |
| 185 this.list = newList; | |
| 186 } | |
| 187 | |
| 188 String toString() => "NumberBuffer"; | |
|
floitsch
2014/10/20 08:52:44
maybe add the contents?
=> "NumberBuffer(${getStr
Lasse Reichstein Nielsen
2014/10/27 12:42:32
I think I had that for debugging, but I'll just re
| |
| 189 | |
| 190 String getString() { | |
| 191 var list = this.list; | |
| 192 if (length < list.length) { | |
| 193 list = new Uint8List.view(list.buffer, 0, length); | |
| 194 } | |
| 195 String result = new String.fromCharCodes(list); | |
| 196 return result; | |
| 197 } | |
| 198 | |
| 199 // TODO(lrn): See if parsing of numbers can be abstracted to something | |
| 200 // not only working on strings, but also on char-code lists, without lossing | |
| 201 // performance. | |
| 202 int parseInt() => int.parse(getString()); | |
| 203 double parseDouble() => double.parse(getString()); | |
| 204 } | |
| 205 | |
| 206 /** | |
| 207 * Chunked JSON parser. | |
| 208 * | |
| 209 * Receives inputs in chunks, gives access to individual parts of the input, | |
| 210 * and stores input state between chunks. | |
| 211 * | |
| 212 * Implementations include [String] and UTF-8 parsers. | |
| 213 */ | |
| 214 abstract class _ChunkedJsonParser { | |
| 139 // A simple non-recursive state-based parser for JSON. | 215 // A simple non-recursive state-based parser for JSON. |
| 140 // | 216 // |
| 141 // Literal values accepted in states ARRAY_EMPTY, ARRAY_COMMA, OBJECT_COLON | 217 // Literal values accepted in states ARRAY_EMPTY, ARRAY_COMMA, OBJECT_COLON |
| 142 // and strings also in OBJECT_EMPTY, OBJECT_COMMA. | 218 // and strings also in OBJECT_EMPTY, OBJECT_COMMA. |
| 143 // VALUE STRING : , } ] Transitions to | 219 // VALUE STRING : , } ] Transitions to |
| 144 // EMPTY X X -> END | 220 // EMPTY X X -> END |
| 145 // ARRAY_EMPTY X X @ -> ARRAY_VALUE / pop | 221 // ARRAY_EMPTY X X @ -> ARRAY_VALUE / pop |
| 146 // ARRAY_VALUE @ @ -> ARRAY_COMMA / pop | 222 // ARRAY_VALUE @ @ -> ARRAY_COMMA / pop |
| 147 // ARRAY_COMMA X X -> ARRAY_VALUE | 223 // ARRAY_COMMA X X -> ARRAY_VALUE |
| 148 // OBJECT_EMPTY X @ -> OBJECT_KEY / pop | 224 // OBJECT_EMPTY X @ -> OBJECT_KEY / pop |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 165 static const int INSIDE_OBJECT = 2; | 241 static const int INSIDE_OBJECT = 2; |
| 166 static const int AFTER_COLON = 3; // Always inside object. | 242 static const int AFTER_COLON = 3; // Always inside object. |
| 167 | 243 |
| 168 static const int ALLOW_STRING_MASK = 8; // Allowed if zero. | 244 static const int ALLOW_STRING_MASK = 8; // Allowed if zero. |
| 169 static const int ALLOW_VALUE_MASK = 4; // Allowed if zero. | 245 static const int ALLOW_VALUE_MASK = 4; // Allowed if zero. |
| 170 static const int ALLOW_VALUE = 0; | 246 static const int ALLOW_VALUE = 0; |
| 171 static const int STRING_ONLY = 4; | 247 static const int STRING_ONLY = 4; |
| 172 static const int NO_VALUES = 12; | 248 static const int NO_VALUES = 12; |
| 173 | 249 |
| 174 // Objects and arrays are "empty" until their first property/element. | 250 // Objects and arrays are "empty" until their first property/element. |
| 251 // At this position, they may either have an entry or a close-bracket. | |
| 175 static const int EMPTY = 0; | 252 static const int EMPTY = 0; |
| 176 static const int NON_EMPTY = 16; | 253 static const int NON_EMPTY = 16; |
| 177 static const int EMPTY_MASK = 16; // Empty if zero. | 254 static const int EMPTY_MASK = 16; // Empty if zero. |
| 178 | 255 |
| 179 | |
| 180 static const int VALUE_READ_BITS = NO_VALUES | NON_EMPTY; | 256 static const int VALUE_READ_BITS = NO_VALUES | NON_EMPTY; |
| 181 | 257 |
| 182 // Actual states. | 258 // Actual states. |
| 183 static const int STATE_INITIAL = EMPTY | ALLOW_VALUE; | 259 static const int STATE_INITIAL = EMPTY | ALLOW_VALUE; |
| 184 static const int STATE_END = NON_EMPTY | NO_VALUES; | 260 static const int STATE_END = NON_EMPTY | NO_VALUES; |
| 185 | 261 |
| 186 static const int STATE_ARRAY_EMPTY = INSIDE_ARRAY | EMPTY | ALLOW_VALUE; | 262 static const int STATE_ARRAY_EMPTY = INSIDE_ARRAY | EMPTY | ALLOW_VALUE; |
| 187 static const int STATE_ARRAY_VALUE = INSIDE_ARRAY | NON_EMPTY | NO_VALUES; | 263 static const int STATE_ARRAY_VALUE = INSIDE_ARRAY | NON_EMPTY | NO_VALUES; |
| 188 static const int STATE_ARRAY_COMMA = INSIDE_ARRAY | NON_EMPTY | ALLOW_VALUE; | 264 static const int STATE_ARRAY_COMMA = INSIDE_ARRAY | NON_EMPTY | ALLOW_VALUE; |
| 189 | 265 |
| (...skipping 29 matching lines...) Expand all Loading... | |
| 219 static const int CHAR_f = 0x66; | 295 static const int CHAR_f = 0x66; |
| 220 static const int CHAR_l = 0x6c; | 296 static const int CHAR_l = 0x6c; |
| 221 static const int CHAR_n = 0x6e; | 297 static const int CHAR_n = 0x6e; |
| 222 static const int CHAR_r = 0x72; | 298 static const int CHAR_r = 0x72; |
| 223 static const int CHAR_s = 0x73; | 299 static const int CHAR_s = 0x73; |
| 224 static const int CHAR_t = 0x74; | 300 static const int CHAR_t = 0x74; |
| 225 static const int CHAR_u = 0x75; | 301 static const int CHAR_u = 0x75; |
| 226 static const int LBRACE = 0x7b; | 302 static const int LBRACE = 0x7b; |
| 227 static const int RBRACE = 0x7d; | 303 static const int RBRACE = 0x7d; |
| 228 | 304 |
| 229 final String source; | 305 // State of partial value at chunk split. |
| 306 static const int NO_PARTIAL = 0; | |
| 307 static const int PARTIAL_STRING = 1; | |
| 308 static const int PARTIAL_NUMERAL = 2; | |
| 309 static const int PARTIAL_KEYWORD = 3; | |
| 310 static const int MASK_PARTIAL = 3; | |
| 311 | |
| 312 // Partial states for numerals. Values can be |'ed with PARTIAL_NUMERAL. | |
| 313 static const int NUM_SIGN = 0; // After initial '-'. | |
| 314 static const int NUM_ZERO = 4; // After '0' as first digit. | |
| 315 static const int NUM_DIGIT = 8; // After digit, no '.' or 'e' seen. | |
| 316 static const int NUM_DOT = 12; // After '.'. | |
| 317 static const int NUM_DOT_DIGIT = 16; // After a decimal digit (after '.'). | |
| 318 static const int NUM_E = 20; // After 'e' or 'E'. | |
| 319 static const int NUM_E_SIGN = 24; // After '-' or '+' after 'e' or 'E'. | |
| 320 static const int NUM_E_DIGIT = 28; // After exponent digit. | |
| 321 static const int NUM_SUCCESS = 32; // Never stored as partial state. | |
| 322 | |
| 323 // Partial states for strings. | |
| 324 static const int STR_PLAIN = 0; // Inside string, but not escape. | |
| 325 static const int STR_ESCAPE = 4; // After '\'. | |
| 326 static const int STR_U = 16; // After '\u' and 0-3 hex digits. | |
| 327 static const int STR_U_COUNT_SHIFT = 2; // Hex digit count in bits 2-3. | |
| 328 static const int STR_U_VALUE_SHIFT = 5; // Hex digit value in bits 5+. | |
| 329 | |
| 330 // Partial states for keywords. | |
| 331 static const int KWD_TYPE_MASK = 12; | |
| 332 static const int KWD_TYPE_SHIFT = 2; | |
| 333 static const int KWD_NULL = 0; // Prefix of "null" seen. | |
| 334 static const int KWD_TRUE = 4; // Prefix of "true" seen. | |
| 335 static const int KWD_FALSE = 8; // Prefix of "false" seen. | |
| 336 static const int KWD_COUNT_SHIFT = 4; // Prefix length in bits 4+. | |
| 337 | |
| 338 // Mask used to mask off two lower bits. | |
| 339 static const int TWO_BIT_MASK = 3; | |
| 340 | |
| 230 final _JsonListener listener; | 341 final _JsonListener listener; |
| 231 _JsonParser(this.source, this.listener); | 342 |
| 343 // The current parsing state. | |
| 344 int state = STATE_INITIAL; | |
| 345 List<int> states = <int>[]; | |
| 346 | |
| 347 /** | |
| 348 * Stores tokenizer state between chunks. | |
| 349 * | |
| 350 * This state is stored when a chunk stops in the middle of a | |
| 351 * token (string, numeral, boolean or null). | |
| 352 * | |
| 353 * The partial state is used to continue parsing on the next chunk. | |
| 354 * The previous chunk is not retained, any data needed are stored in | |
| 355 * this integer, or in the [buffer] field as a string-building buffer | |
| 356 * or a [_NumberBuffer]. | |
| 357 * | |
| 358 * Prefix state stored in [prefixState] as bits. | |
| 359 * | |
| 360 * ..00 : No partial value (NO_PARTIAL). | |
| 361 * | |
| 362 * ..00001 : Partial string, not inside escape. | |
| 363 * ..00101 : Partial string, after '\'. | |
| 364 * ..vvvv1dd01 : Partial \u escape. | |
| 365 * The 'dd' bits (2-3) encode the number of hex digits seen. | |
| 366 * Bits 5-16 encode the value of the hex digits seen so far. | |
| 367 * | |
| 368 * ..0ddd10 : Partial numeral. | |
| 369 * The `ddd` bits store the parts of in the numeral seen so | |
| 370 * far, as the constants `NUM_*` defined above. | |
| 371 * The characters of the numeral are stored in [buffer] | |
| 372 * as a [_NumberBuffer]. | |
| 373 * | |
| 374 * ..0ddd0011 : Partial 'null' keyword. | |
| 375 * ..0ddd0111 : Partial 'true' keyword. | |
| 376 * ..0ddd1011 : Partial 'false' keyword. | |
| 377 * For all three keywords, the `ddd` bits encode the number | |
| 378 * of letters seen. | |
| 379 */ | |
| 380 int partialState = NO_PARTIAL; | |
| 381 | |
| 382 /** | |
| 383 * Extra data stored while parsing a primitive value. | |
| 384 * May be set during parsing, always set at chunk end if a value is partial. | |
| 385 * | |
| 386 * May contain a string buffer while parsing strings. | |
| 387 */ | |
| 388 var buffer = null; | |
| 389 | |
| 390 _ChunkedJsonParser(this.listener); | |
| 391 | |
| 392 /** | |
| 393 * Push the current parse [state] on a stack. | |
| 394 * | |
| 395 * State is pushed when a new array or object literal starts, | |
| 396 * so the parser can go back to the correct value when the literal ends. | |
| 397 */ | |
| 398 void saveState(int state) { | |
| 399 states.add(state); | |
| 400 } | |
| 401 | |
| 402 /** | |
| 403 * Restore a state pushed with [saveState]. | |
| 404 */ | |
| 405 int restoreState() { | |
| 406 return states.removeLast(); // Throws if empty. | |
| 407 } | |
| 408 | |
| 409 /** | |
| 410 * Finalizes the parsing. | |
| 411 * | |
| 412 * If the source ends in a number, it will be completed. Any other partial | |
| 413 * state is an error. | |
|
Søren Gjesse
2014/10/24 11:12:24
And the states stack is empty, right?
Lasse Reichstein Nielsen
2014/10/27 12:42:33
That's what the next paragraph tries to say. I'll
| |
| 414 * | |
| 415 * Throws if the source read so far doesn't end up with a complete | |
| 416 * parsed value. | |
| 417 */ | |
| 418 void close() { | |
| 419 if (partialState != NO_PARTIAL) { | |
| 420 int partialType = partialState & MASK_PARTIAL; | |
| 421 if (partialType == PARTIAL_NUMERAL) { | |
| 422 int numState = partialState & ~MASK_PARTIAL; | |
| 423 // A partial number might be a valid number if we know it's done. | |
| 424 // There is an unnecessary overhead if input is a single number, | |
| 425 // but this is assumed to be rare. | |
| 426 _NumberBuffer buffer = this.buffer; | |
| 427 this.buffer = null; | |
| 428 finishChunkNumber(numState, 0, 0, buffer); | |
| 429 } else if (partialType == PARTIAL_STRING) { | |
| 430 fail(chunkEnd, "Unterminate string"); | |
|
Søren Gjesse
2014/10/24 11:12:24
Unterminated
Lasse Reichstein Nielsen
2014/10/27 12:42:32
Done.
| |
| 431 } else { | |
| 432 assert(partialType == PARTIAL_KEYWORD); | |
| 433 fail(chunkEnd); // Incomplete literal. | |
| 434 } | |
| 435 } | |
| 436 if (state != STATE_END) { | |
| 437 fail(chunkEnd); | |
| 438 } | |
| 439 } | |
| 440 | |
| 441 /** | |
| 442 * Read out the result after successfully closing the parser. | |
| 443 * | |
| 444 * The parser is closed by calling [close] or calling [addSourceChunk] with | |
| 445 * `true` as second (`isLast`) argument. | |
| 446 */ | |
| 447 Object get result { | |
| 448 return listener.result; | |
| 449 } | |
| 450 | |
| 451 // Sets the current source chunk. | |
|
floitsch
2014/10/20 08:52:44
Make all these comments dartdocs.
Lasse Reichstein Nielsen
2014/10/27 12:42:33
Done.
| |
| 452 void set chunk(var source); | |
| 453 | |
| 454 // Length of current chunk. | |
| 455 int get chunkEnd; | |
| 456 | |
| 457 // Returns the chunk itself. Used by fail to include it in FormatException. | |
|
Søren Gjesse
2014/10/24 11:12:23
So the FormatException only have the chunk as the
Lasse Reichstein Nielsen
2014/10/27 12:42:32
Yes, that's all we have.
The FormatException will
| |
| 458 get chunk; | |
| 459 | |
| 460 // Get charcacter/code unit of current chunk. | |
| 461 int getChar(int index); | |
| 462 | |
| 463 // Copy ASCII characters from start to end of chunk into a list. | |
| 464 // Used for number buffer (always copies ASCII, so encoding is not important). | |
| 465 void copyCharsToList(int start, int end, List<int> target); | |
| 466 | |
| 467 // Build a string using input code units. Creates a string buffer | |
| 468 // and enables adding characters and slices to that buffer. | |
| 469 // The buffer is stored in [buffer]. If the string is unterminated, | |
| 470 // the same buffer is used to continue parsing in the next chunk. | |
| 471 void beginString(); | |
| 472 // Add single character code to string being built. | |
| 473 void addCharToString(int charCode); | |
| 474 // Adds slice of current chunk to string being built. | |
|
floitsch
2014/10/20 08:52:44
end exclusive?
Lasse Reichstein Nielsen
2014/10/27 12:42:33
Acknowledged.
| |
| 475 void addSliceToString(int start, int end); | |
| 476 // Finalizes the string being built and returns it as a String. | |
| 477 String endString(); | |
| 478 | |
| 479 // Extracts a literal string from a source slice. | |
|
Søren Gjesse
2014/10/24 11:12:23
source slice -> chunk slice
Lasse Reichstein Nielsen
2014/10/27 12:42:32
Done.
| |
| 480 // No interpretation of the content is performed, except for converting | |
| 481 // the source format to string. | |
| 482 // This can be implemented more or less efficiently depending on the | |
| 483 // underlying source. | |
| 484 String getString(int start, int end); | |
| 485 | |
| 486 // Parse a slice of input as an integer. | |
|
Søren Gjesse
2014/10/24 11:12:24
slice of input -> chunk slice
Lasse Reichstein Nielsen
2014/10/27 12:42:33
Done.
| |
| 487 // The format is expected to be correct. | |
| 488 int parseInt(int start, int end) { | |
| 489 return int.parse(getString(start, end)); | |
| 490 } | |
| 491 | |
| 492 // Parse a slice of input as a double. | |
|
Søren Gjesse
2014/10/24 11:12:24
ditto.
Lasse Reichstein Nielsen
2014/10/27 12:42:33
Done.
| |
| 493 // The format is expected to be correct. | |
| 494 double parseDouble(int start, int end) { | |
| 495 return double.parse(getString(start, end)); | |
| 496 } | |
| 497 | |
| 498 // Create a _NumberBuffer containing the digits from [start] to [chunkEnd]. | |
| 499 void createNumberBuffer(int start) { | |
| 500 assert(start >= 0); | |
| 501 assert(start < chunkEnd); | |
| 502 int length = chunkEnd - start; | |
| 503 var buffer = new _NumberBuffer(length); | |
| 504 copyCharsToList(start, chunkEnd, buffer.list); | |
| 505 buffer.length = length; | |
| 506 return buffer; | |
| 507 } | |
| 508 | |
| 509 /** | |
| 510 * Continues parsing a partial value. | |
| 511 */ | |
| 512 int parsePartial(int position) { | |
| 513 if (position == chunkEnd) return position; | |
| 514 int partialState = this.partialState; | |
| 515 assert(partialState != NO_PARTIAL); | |
| 516 int partialType = partialState & MASK_PARTIAL; | |
| 517 this.partialState = NO_PARTIAL; | |
| 518 partialState = partialState & ~MASK_PARTIAL; | |
| 519 assert(partialType != 0); | |
| 520 if (partialType == PARTIAL_STRING) { | |
| 521 position = parsePartialString(position, partialState); | |
| 522 } else if (partialType == PARTIAL_NUMERAL) { | |
| 523 position = parsePartialNumber(position, partialState); | |
| 524 } else if (partialType == PARTIAL_KEYWORD) { | |
| 525 position = parsePartialKeyword(position, partialState); | |
| 526 } | |
| 527 return position; | |
| 528 } | |
| 529 | |
| 530 // Parses the remainder of a number into the number buffer, | |
| 531 // checking syntax as it goes. | |
| 532 // Starts at chunk index 0, and returns the index of the first | |
|
Søren Gjesse
2014/10/24 11:12:24
chunk index 0 -> current chunk index?
Lasse Reichstein Nielsen
2014/10/27 12:42:33
At [position] actually.
| |
| 533 // non-digit character found, or chunkEnd if the entire chunk is | |
| 534 // used. | |
| 535 // Throws if a syntax error is detected. | |
| 536 int parsePartialNumber(int position, int state) { | |
| 537 int start = position; | |
| 538 // Primitive implementation, can be optimized. | |
| 539 _NumberBuffer buffer = this.buffer; | |
| 540 this.buffer = null; | |
| 541 int end = chunkEnd; | |
| 542 toBailout: { | |
| 543 if (position == end) break toBailout; | |
| 544 int char = getChar(position); | |
| 545 int digit = char ^ CHAR_0; | |
| 546 if (state == NUM_SIGN) { | |
| 547 if (digit <= 9) { | |
| 548 if (digit == 0) { | |
| 549 state = NUM_ZERO; | |
| 550 } else { | |
| 551 state = NUM_DIGIT; | |
| 552 } | |
| 553 position++; | |
| 554 if (position == end) break toBailout; | |
| 555 char = getChar(position); | |
| 556 digit = char ^ CHAR_0; | |
| 557 } else { | |
| 558 return fail(position); | |
| 559 } | |
| 560 } | |
| 561 if (state == NUM_ZERO) { | |
| 562 if (digit <= 9) return fail(position); | |
|
floitsch
2014/10/20 08:52:44
Add comment, why this is not allowed.
Lasse Reichstein Nielsen
2014/10/27 12:42:33
Done.
| |
| 563 state = NUM_DIGIT; | |
| 564 } | |
| 565 while (state == NUM_DIGIT) { | |
| 566 if (digit > 9) { | |
| 567 if (char == DECIMALPOINT) { | |
| 568 state = NUM_DOT; | |
| 569 } else if ((char | 0x20) == CHAR_e) { | |
| 570 state = NUM_E; | |
| 571 } else { | |
| 572 finishChunkNumber(state, start, position, buffer); | |
| 573 return position; | |
| 574 } | |
| 575 } | |
| 576 position++; | |
| 577 if (position == end) break toBailout; | |
| 578 char = getChar(position); | |
| 579 digit = char ^ CHAR_0; | |
| 580 } | |
| 581 if (state == NUM_DOT) { | |
| 582 if (digit > 9) return fail(position); | |
| 583 state = NUM_DOT_DIGIT; | |
| 584 } | |
| 585 while (state == NUM_DOT_DIGIT) { | |
| 586 if (digit > 9) { | |
| 587 if ((char | 0x20) == CHAR_e) { | |
| 588 state = NUM_E; | |
| 589 } else { | |
| 590 finishChunkNumber(state, start, position, buffer); | |
| 591 return position; | |
| 592 } | |
| 593 } | |
| 594 position++; | |
| 595 if (position == end) break toBailout; | |
| 596 char = getChar(position); | |
| 597 digit = char ^ CHAR_0; | |
| 598 } | |
| 599 if (state == NUM_E) { | |
| 600 if (char == PLUS || char == MINUS) { | |
| 601 state = NUM_E_SIGN; | |
| 602 position++; | |
| 603 if (position == end) break toBailout; | |
| 604 char = getChar(position); | |
| 605 digit = char ^ CHAR_0; | |
| 606 } | |
| 607 } | |
| 608 assert(state >= NUM_E); | |
| 609 while (digit <= 9) { | |
| 610 state = NUM_E_DIGIT; | |
| 611 position++; | |
| 612 if (position == end) break toBailout; | |
| 613 char = getChar(position); | |
| 614 digit = char ^ CHAR_0; | |
| 615 } | |
| 616 finishChunkNumber(state, start, position, buffer); | |
| 617 return position; | |
| 618 } | |
| 619 // Bailout code in case the current chunk ends while parsing the numeral. | |
| 620 assert(position == end); | |
| 621 continueChunkNumber(state, start, buffer); | |
| 622 return chunkEnd; | |
| 623 } | |
| 624 | |
| 625 int parsePartialString(int position, int partialState) { | |
| 626 if (partialState == STR_PLAIN) { | |
| 627 return parseStringToBuffer(position); | |
| 628 } | |
| 629 if (partialState == STR_ESCAPE) { | |
| 630 position = parseStringEscape(position); | |
| 631 // parseStringEscape sets partialState if it sees the end. | |
| 632 if (position == chunkEnd) return position; | |
| 633 return parseStringToBuffer(position); | |
| 634 } | |
| 635 assert((partialState & STR_U) != 0); | |
| 636 int value = partialState >> STR_U_VALUE_SHIFT; | |
| 637 int count = (partialState >> STR_U_COUNT_SHIFT) & TWO_BIT_MASK; | |
| 638 for (int i = count; i < 4; i++, position++) { | |
| 639 if (position == chunkEnd) return chunkStringEscapeU(i, value); | |
| 640 int char = getChar(position); | |
| 641 int digit = parseHexDigit(char); | |
| 642 if (digit < 0) fail(position, "Invalid hex digit"); | |
| 643 value = 16 * value + digit; | |
| 644 } | |
| 645 addCharToString(value); | |
| 646 return parseStringToBuffer(position); | |
| 647 } | |
| 648 | |
| 649 int parsePartialKeyword(int position, int partialState) { | |
| 650 int keywordType = partialState & KWD_TYPE_MASK; | |
| 651 int count = partialState >> KWD_COUNT_SHIFT; | |
| 652 int keywordTypeIndex = keywordType >> KWD_TYPE_SHIFT; | |
| 653 String keyword = const ["null", "true", "false"][keywordTypeIndex]; | |
| 654 assert(count < keyword.length); | |
| 655 do { | |
| 656 if (position == chunkEnd) { | |
| 657 this.partialState = | |
| 658 PARTIAL_KEYWORD | keywordType | (count << KWD_COUNT_SHIFT); | |
| 659 return chunkEnd; | |
| 660 } | |
| 661 int expectedChar = keyword.codeUnitAt(count); | |
| 662 if (getChar(position) != expectedChar) return fail(position); | |
| 663 position++; | |
| 664 count++; | |
| 665 } while (count < keyword.length); | |
| 666 if (keywordType == KWD_NULL) { | |
| 667 listener.handleNull(); | |
| 668 } else { | |
| 669 listener.handleBool(keywordType == KWD_TRUE); | |
| 670 } | |
| 671 return position; | |
| 672 } | |
| 673 | |
| 674 int parseHexDigit(int char) { | |
| 675 int digit = char ^ 0x30; | |
| 676 if (digit <= 9) return digit; | |
| 677 int letter = (char | 0x20) ^ 0x60; | |
| 678 // values 1 .. 6 are 'a' through 'f' | |
| 679 if (letter <= 6 && letter > 0) return letter + 9; | |
| 680 return -1; | |
| 681 } | |
| 232 | 682 |
| 233 /** Parses [source], or throws if it fails. */ | 683 /** Parses [source], or throws if it fails. */ |
| 234 void parse() { | 684 void parse(int position) { |
| 235 final List<int> states = <int>[]; | 685 int length = chunkEnd; |
| 236 int state = STATE_INITIAL; | 686 if (partialState != NO_PARTIAL) { |
| 237 int position = 0; | 687 position = parsePartial(position); |
| 238 int length = source.length; | 688 if (position == length) return; |
| 689 } | |
| 690 int state = this.state; | |
| 239 while (position < length) { | 691 while (position < length) { |
| 240 int char = source.codeUnitAt(position); | 692 int char = getChar(position); |
| 241 switch (char) { | 693 switch (char) { |
| 242 case SPACE: | 694 case SPACE: |
| 243 case CARRIAGE_RETURN: | 695 case CARRIAGE_RETURN: |
| 244 case NEWLINE: | 696 case NEWLINE: |
| 245 case TAB: | 697 case TAB: |
| 246 position++; | 698 position++; |
| 247 break; | 699 break; |
| 248 case QUOTE: | 700 case QUOTE: |
| 249 if ((state & ALLOW_STRING_MASK) != 0) fail(position); | 701 if ((state & ALLOW_STRING_MASK) != 0) return fail(position); |
| 702 state |= VALUE_READ_BITS; | |
| 250 position = parseString(position + 1); | 703 position = parseString(position + 1); |
| 251 state |= VALUE_READ_BITS; | |
| 252 break; | 704 break; |
| 253 case LBRACKET: | 705 case LBRACKET: |
| 254 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); | 706 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position); |
| 255 listener.beginArray(); | 707 listener.beginArray(); |
| 256 states.add(state); | 708 saveState(state); |
| 257 state = STATE_ARRAY_EMPTY; | 709 state = STATE_ARRAY_EMPTY; |
| 258 position++; | 710 position++; |
| 259 break; | 711 break; |
| 260 case LBRACE: | 712 case LBRACE: |
| 261 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); | 713 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position); |
| 262 listener.beginObject(); | 714 listener.beginObject(); |
| 263 states.add(state); | 715 saveState(state); |
| 264 state = STATE_OBJECT_EMPTY; | 716 state = STATE_OBJECT_EMPTY; |
| 265 position++; | 717 position++; |
| 266 break; | 718 break; |
| 267 case CHAR_n: | 719 case CHAR_n: |
| 268 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); | 720 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position); |
| 721 state |= VALUE_READ_BITS; | |
| 269 position = parseNull(position); | 722 position = parseNull(position); |
| 270 state |= VALUE_READ_BITS; | |
| 271 break; | 723 break; |
| 272 case CHAR_f: | 724 case CHAR_f: |
| 273 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); | 725 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position); |
| 726 state |= VALUE_READ_BITS; | |
| 274 position = parseFalse(position); | 727 position = parseFalse(position); |
| 275 state |= VALUE_READ_BITS; | |
| 276 break; | 728 break; |
| 277 case CHAR_t: | 729 case CHAR_t: |
| 278 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); | 730 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position); |
| 731 state |= VALUE_READ_BITS; | |
| 279 position = parseTrue(position); | 732 position = parseTrue(position); |
| 280 state |= VALUE_READ_BITS; | |
| 281 break; | 733 break; |
| 282 case COLON: | 734 case COLON: |
| 283 if (state != STATE_OBJECT_KEY) fail(position); | 735 if (state != STATE_OBJECT_KEY) return fail(position); |
| 284 listener.propertyName(); | 736 listener.propertyName(); |
| 285 state = STATE_OBJECT_COLON; | 737 state = STATE_OBJECT_COLON; |
| 286 position++; | 738 position++; |
| 287 break; | 739 break; |
| 288 case COMMA: | 740 case COMMA: |
| 289 if (state == STATE_OBJECT_VALUE) { | 741 if (state == STATE_OBJECT_VALUE) { |
| 290 listener.propertyValue(); | 742 listener.propertyValue(); |
| 291 state = STATE_OBJECT_COMMA; | 743 state = STATE_OBJECT_COMMA; |
| 292 position++; | 744 position++; |
| 293 } else if (state == STATE_ARRAY_VALUE) { | 745 } else if (state == STATE_ARRAY_VALUE) { |
| 294 listener.arrayElement(); | 746 listener.arrayElement(); |
| 295 state = STATE_ARRAY_COMMA; | 747 state = STATE_ARRAY_COMMA; |
| 296 position++; | 748 position++; |
| 297 } else { | 749 } else { |
| 298 fail(position); | 750 return fail(position); |
| 299 } | 751 } |
| 300 break; | 752 break; |
| 301 case RBRACKET: | 753 case RBRACKET: |
| 302 if (state == STATE_ARRAY_EMPTY) { | 754 if (state == STATE_ARRAY_EMPTY) { |
| 303 listener.endArray(); | 755 listener.endArray(); |
| 304 } else if (state == STATE_ARRAY_VALUE) { | 756 } else if (state == STATE_ARRAY_VALUE) { |
| 305 listener.arrayElement(); | 757 listener.arrayElement(); |
| 306 listener.endArray(); | 758 listener.endArray(); |
| 307 } else { | 759 } else { |
| 308 fail(position); | 760 return fail(position); |
| 309 } | 761 } |
| 310 state = states.removeLast() | VALUE_READ_BITS; | 762 state = restoreState() | VALUE_READ_BITS; |
| 311 position++; | 763 position++; |
| 312 break; | 764 break; |
| 313 case RBRACE: | 765 case RBRACE: |
| 314 if (state == STATE_OBJECT_EMPTY) { | 766 if (state == STATE_OBJECT_EMPTY) { |
| 315 listener.endObject(); | 767 listener.endObject(); |
| 316 } else if (state == STATE_OBJECT_VALUE) { | 768 } else if (state == STATE_OBJECT_VALUE) { |
| 317 listener.propertyValue(); | 769 listener.propertyValue(); |
| 318 listener.endObject(); | 770 listener.endObject(); |
| 319 } else { | 771 } else { |
| 320 fail(position); | 772 return fail(position); |
| 321 } | 773 } |
| 322 state = states.removeLast() | VALUE_READ_BITS; | 774 state = restoreState() | VALUE_READ_BITS; |
| 323 position++; | 775 position++; |
| 324 break; | 776 break; |
| 325 default: | 777 default: |
| 326 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); | 778 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); |
| 779 state |= VALUE_READ_BITS; | |
| 327 position = parseNumber(char, position); | 780 position = parseNumber(char, position); |
| 328 state |= VALUE_READ_BITS; | |
| 329 break; | 781 break; |
| 330 } | 782 } |
| 331 } | 783 } |
| 332 if (state != STATE_END) fail(position); | 784 this.state = state; |
| 333 } | 785 } |
| 334 | 786 |
| 335 /** | 787 /** |
| 336 * Parses a "true" literal starting at [position]. | 788 * Parses a "true" literal starting at [position]. |
| 337 * | 789 * |
| 338 * [:source[position]:] must be "t". | 790 * [:source[position]:] must be "t". |
| 339 */ | 791 */ |
| 340 int parseTrue(int position) { | 792 int parseTrue(int position) { |
| 341 assert(source.codeUnitAt(position) == CHAR_t); | 793 assert(getChar(position) == CHAR_t); |
| 342 if (source.length < position + 4) fail(position, "Unexpected identifier"); | 794 if (chunkEnd < position + 4) { |
| 343 if (source.codeUnitAt(position + 1) != CHAR_r || | 795 return parseKeywordPrefix(position, "true", KWD_TRUE); |
| 344 source.codeUnitAt(position + 2) != CHAR_u || | 796 } |
| 345 source.codeUnitAt(position + 3) != CHAR_e) { | 797 if (getChar(position + 1) != CHAR_r || |
| 346 fail(position); | 798 getChar(position + 2) != CHAR_u || |
| 799 getChar(position + 3) != CHAR_e) { | |
| 800 return fail(position); | |
| 347 } | 801 } |
| 348 listener.handleBool(true); | 802 listener.handleBool(true); |
| 349 return position + 4; | 803 return position + 4; |
| 350 } | 804 } |
| 351 | 805 |
| 352 /** | 806 /** |
| 353 * Parses a "false" literal starting at [position]. | 807 * Parses a "false" literal starting at [position]. |
| 354 * | 808 * |
| 355 * [:source[position]:] must be "f". | 809 * [:source[position]:] must be "f". |
| 356 */ | 810 */ |
| 357 int parseFalse(int position) { | 811 int parseFalse(int position) { |
| 358 assert(source.codeUnitAt(position) == CHAR_f); | 812 assert(getChar(position) == CHAR_f); |
| 359 if (source.length < position + 5) fail(position, "Unexpected identifier"); | 813 if (chunkEnd < position + 5) { |
| 360 if (source.codeUnitAt(position + 1) != CHAR_a || | 814 return parseKeywordPrefix(position, "false", KWD_FALSE); |
| 361 source.codeUnitAt(position + 2) != CHAR_l || | 815 } |
| 362 source.codeUnitAt(position + 3) != CHAR_s || | 816 if (getChar(position + 1) != CHAR_a || |
| 363 source.codeUnitAt(position + 4) != CHAR_e) { | 817 getChar(position + 2) != CHAR_l || |
| 364 fail(position); | 818 getChar(position + 3) != CHAR_s || |
| 819 getChar(position + 4) != CHAR_e) { | |
| 820 return fail(position); | |
| 365 } | 821 } |
| 366 listener.handleBool(false); | 822 listener.handleBool(false); |
| 367 return position + 5; | 823 return position + 5; |
| 368 } | 824 } |
| 369 | 825 |
| 370 /** | 826 /** |
| 371 * Parses a "null" literal starting at [position]. | 827 * Parses a "null" literal starting at [position]. |
| 372 * | 828 * |
| 373 * [:source[position]:] must be "n". | 829 * [:source[position]:] must be "n". |
| 374 */ | 830 */ |
| 375 int parseNull(int position) { | 831 int parseNull(int position) { |
| 376 assert(source.codeUnitAt(position) == CHAR_n); | 832 assert(getChar(position) == CHAR_n); |
| 377 if (source.length < position + 4) fail(position, "Unexpected identifier"); | 833 if (chunkEnd < position + 4) { |
| 378 if (source.codeUnitAt(position + 1) != CHAR_u || | 834 return parseKeywordPrefix(position, "null", KWD_NULL); |
| 379 source.codeUnitAt(position + 2) != CHAR_l || | 835 } |
| 380 source.codeUnitAt(position + 3) != CHAR_l) { | 836 if (getChar(position + 1) != CHAR_u || |
| 381 fail(position); | 837 getChar(position + 2) != CHAR_l || |
| 838 getChar(position + 3) != CHAR_l) { | |
| 839 return fail(position); | |
| 382 } | 840 } |
| 383 listener.handleNull(); | 841 listener.handleNull(); |
| 384 return position + 4; | 842 return position + 4; |
| 385 } | 843 } |
| 386 | 844 |
| 845 int parseKeywordPrefix(int position, String chars, int type) { | |
| 846 assert(getChar(position) == chars.codeUnitAt(0)); | |
| 847 int length = chunkEnd; | |
| 848 int start = position; | |
| 849 int count = 1; | |
| 850 while (++position < length) { | |
| 851 int char = getChar(position); | |
| 852 if (char != chars.codeUnitAt(count)) return fail(start); | |
| 853 count++; | |
| 854 } | |
| 855 this.partialState = PARTIAL_KEYWORD | type | (count << KWD_COUNT_SHIFT); | |
| 856 return length; | |
| 857 } | |
| 858 | |
| 387 /** | 859 /** |
| 388 * Parses a string value. | 860 * Parses a string value. |
| 389 * | 861 * |
| 390 * Initial [position] is right after the initial quote. | 862 * Initial [position] is right after the initial quote. |
| 391 * Returned position right after the final quote. | 863 * Returned position right after the final quote. |
| 392 */ | 864 */ |
| 393 int parseString(int position) { | 865 int parseString(int position) { |
| 394 // Format: '"'([^\x00-\x1f\\\"]|'\\'[bfnrt/\\"])*'"' | 866 // Format: '"'([^\x00-\x1f\\\"]|'\\'[bfnrt/\\"])*'"' |
| 395 // Initial position is right after first '"'. | 867 // Initial position is right after first '"'. |
| 396 int start = position; | 868 int start = position; |
| 397 while (position < source.length) { | 869 int end = chunkEnd; |
| 398 int char = source.codeUnitAt(position++); | 870 while (position < end) { |
| 871 int char = getChar(position++); | |
| 399 // BACKSLASH is larger than QUOTE and SPACE. | 872 // BACKSLASH is larger than QUOTE and SPACE. |
| 400 if (char > BACKSLASH) { | 873 if (char > BACKSLASH) { |
| 401 continue; | 874 continue; |
| 402 } | 875 } |
| 403 if (char == BACKSLASH) { | 876 if (char == BACKSLASH) { |
| 404 return parseStringWithEscapes(start, position - 1); | 877 beginString(); |
| 878 addSliceToString(start, position - 1); | |
| 879 return parseStringToBuffer(position - 1); | |
| 405 } | 880 } |
| 406 if (char == QUOTE) { | 881 if (char == QUOTE) { |
| 407 listener.handleString(source.substring(start, position - 1)); | 882 listener.handleString(getString(start, position - 1)); |
| 408 return position; | 883 return position; |
| 409 } | 884 } |
| 410 if (char < SPACE) { | 885 if (char < SPACE) { |
| 411 fail(position - 1, "Control character in string"); | 886 fail(position - 1, "Control character in string"); |
| 412 } | 887 } |
| 413 } | 888 } |
| 414 fail(start - 1, "Unterminated string"); | 889 beginString(); |
| 890 addSliceToString(start, end); | |
| 891 return chunkString(STR_PLAIN); | |
| 415 } | 892 } |
| 416 | 893 |
| 417 int parseStringWithEscapes(start, position) { | 894 int chunkString(int stringState) { |
| 418 // Backslash escape detected. Collect character codes for rest of string. | 895 partialState = PARTIAL_STRING | stringState; |
| 419 int firstEscape = position; | 896 return chunkEnd; |
| 420 List<int> chars = <int>[]; | 897 } |
| 421 for (int i = start; i < firstEscape; i++) { | 898 |
| 422 chars.add(source.codeUnitAt(i)); | 899 int chunkStringEscapeU(int count, int value) { |
| 900 partialState = PARTIAL_STRING | STR_U | | |
| 901 (count << STR_U_COUNT_SHIFT) | | |
| 902 (value << STR_U_VALUE_SHIFT); | |
| 903 return chunkEnd; | |
| 904 } | |
| 905 | |
| 906 int parseStringToBuffer(position) { | |
| 907 int end = chunkEnd; | |
| 908 int start = position; | |
| 909 while (true) { | |
| 910 if (position == end) { | |
| 911 if (position > start) { | |
| 912 addSliceToString(start, position); | |
| 913 } | |
| 914 return chunkString(STR_PLAIN); | |
| 915 } | |
| 916 int char = getChar(position++); | |
| 917 if (char > BACKSLASH) continue; | |
| 918 if (char < SPACE) { | |
| 919 fail(position - 1); // Control character in string. | |
| 920 return; | |
| 921 } | |
| 922 if (char == QUOTE) { | |
| 923 int quotePosition = position - 1; | |
| 924 if (quotePosition > start) { | |
| 925 addSliceToString(start, quotePosition); | |
| 926 } | |
| 927 listener.handleString(endString()); | |
| 928 return position; | |
| 929 } | |
| 930 if (char != BACKSLASH) { | |
| 931 continue; | |
| 932 } | |
| 933 // Handle escape. | |
| 934 if (position - 1 > start) { | |
| 935 addSliceToString(start, position - 1); | |
| 936 } | |
| 937 if (position == end) return chunkString(STR_ESCAPE); | |
| 938 position = parseStringEscape(position); | |
| 939 if (position == end) return position; | |
| 940 start = position; | |
| 423 } | 941 } |
| 424 position++; | 942 return -1; // UNREACHABLE. |
| 425 while (true) { | 943 } |
| 426 if (position == source.length) { | 944 |
| 427 fail(start - 1, "Unterminated string"); | 945 int parseStringEscape(int position) { |
|
Søren Gjesse
2014/10/24 11:12:24
Add a comment that position is just after the back
Lasse Reichstein Nielsen
2014/10/27 12:42:32
Done.
| |
| 428 } | 946 int char = getChar(position++); |
| 429 int char = source.codeUnitAt(position); | 947 int length = chunkEnd; |
| 430 switch (char) { | 948 switch (char) { |
| 431 case CHAR_b: char = BACKSPACE; break; | 949 case CHAR_b: char = BACKSPACE; break; |
| 432 case CHAR_f: char = FORM_FEED; break; | 950 case CHAR_f: char = FORM_FEED; break; |
| 433 case CHAR_n: char = NEWLINE; break; | 951 case CHAR_n: char = NEWLINE; break; |
| 434 case CHAR_r: char = CARRIAGE_RETURN; break; | 952 case CHAR_r: char = CARRIAGE_RETURN; break; |
| 435 case CHAR_t: char = TAB; break; | 953 case CHAR_t: char = TAB; break; |
| 436 case SLASH: | 954 case SLASH: |
| 437 case BACKSLASH: | 955 case BACKSLASH: |
| 438 case QUOTE: | 956 case QUOTE: |
| 439 break; | 957 break; |
| 440 case CHAR_u: | 958 case CHAR_u: |
| 441 int hexStart = position - 1; | 959 int hexStart = position - 1; |
| 442 int value = 0; | 960 int value = 0; |
| 443 for (int i = 0; i < 4; i++) { | 961 for (int i = 0; i < 4; i++) { |
| 444 position++; | 962 if (position == length) return chunkStringEscapeU(i, value); |
| 445 if (position == source.length) { | 963 char = getChar(position++); |
| 446 fail(start - 1, "Unterminated string"); | 964 int digit = char ^ 0x30; |
| 965 value *= 16; | |
| 966 if (digit <= 9) { | |
| 967 value += digit; | |
| 968 } else { | |
| 969 digit = (char | 0x20) - CHAR_a; | |
| 970 if (digit < 0 || digit > 5) { | |
| 971 return fail(hexStart, "Invalid unicode escape"); | |
| 447 } | 972 } |
| 448 char = source.codeUnitAt(position); | 973 value += digit + 10; |
| 449 char -= 0x30; | |
| 450 if (char < 0) fail(hexStart, "Invalid unicode escape"); | |
| 451 if (char < 10) { | |
| 452 value = value * 16 + char; | |
| 453 } else { | |
| 454 char = (char | 0x20) - 0x31; | |
| 455 if (char < 0 || char > 5) { | |
| 456 fail(hexStart, "Invalid unicode escape"); | |
| 457 } | |
| 458 value = value * 16 + char + 10; | |
| 459 } | |
| 460 } | 974 } |
| 461 char = value; | |
| 462 break; | |
| 463 default: | |
| 464 if (char < SPACE) fail(position, "Control character in string"); | |
| 465 fail(position, "Unrecognized string escape"); | |
| 466 } | |
| 467 do { | |
| 468 chars.add(char); | |
| 469 position++; | |
| 470 if (position == source.length) fail(start - 1, "Unterminated string"); | |
| 471 char = source.codeUnitAt(position); | |
| 472 if (char == QUOTE) { | |
| 473 String result = new String.fromCharCodes(chars); | |
| 474 listener.handleString(result); | |
| 475 return position + 1; | |
| 476 } | 975 } |
| 477 if (char < SPACE) { | 976 char = value; |
| 478 fail(position, "Control character in string"); | 977 break; |
| 479 } | 978 default: |
| 480 } while (char != BACKSLASH); | 979 if (char < SPACE) return fail(position, "Control character in string"); |
| 481 position++; | 980 return fail(position, "Unrecognized string escape"); |
| 482 } | 981 } |
| 982 addCharToString(char); | |
| 983 if (position == length) return chunkString(STR_PLAIN); | |
| 984 return position; | |
| 985 } | |
| 986 | |
| 987 /// Sets up a partial numeral state. | |
| 988 /// Returns chunkEnd to allow easy one-line bailout tests. | |
| 989 int beginChunkNumber(int state, int start) { | |
| 990 int end = chunkEnd; | |
| 991 int length = end - start; | |
| 992 var buffer = new _NumberBuffer(length); | |
| 993 copyCharsToList(start, end, buffer.list, 0); | |
| 994 buffer.length = length; | |
| 995 this.buffer = buffer; | |
| 996 this.partialState = PARTIAL_NUMERAL | state; | |
| 997 return end; | |
| 998 } | |
| 999 | |
| 1000 void addNumberChunk(_NumberBuffer buffer, int start, int end, int overhead) { | |
| 1001 int length = end - start; | |
| 1002 int count = buffer.length; | |
| 1003 int newCount = count + length; | |
| 1004 int newCapacity = newCount + overhead; | |
| 1005 buffer.ensureCapacity(newCapacity); | |
| 1006 copyCharsToList(start, end, buffer.list, count); | |
| 1007 buffer.length = newCount; | |
| 1008 } | |
| 1009 | |
| 1010 // Continues an already chunked number accross an entire chunk. | |
| 1011 int continueChunkNumber(int state, int start, _NumberBuffer buffer) { | |
| 1012 int end = chunkEnd; | |
| 1013 addNumberChunk(buffer, start, end, _NumberBuffer.kDefaultOverhead); | |
| 1014 this.buffer = buffer; | |
| 1015 this.partialState = PARTIAL_NUMERAL | state; | |
| 1016 return end; | |
| 1017 } | |
| 1018 | |
| 1019 int finishChunkNumber(int state, int start, int end, _NumberBuffer buffer) { | |
| 1020 if (state == NUM_ZERO) { | |
| 1021 listener.handleNumber(0); | |
| 1022 return; | |
| 1023 } | |
| 1024 if (end > start) { | |
| 1025 addNumberChunk(buffer, start, end, 0); | |
| 1026 } | |
| 1027 if (state == NUM_DIGIT) { | |
| 1028 listener.handleNumber(buffer.parseInt()); | |
| 1029 } else if (state == NUM_DOT_DIGIT || state == NUM_E_DIGIT) { | |
| 1030 listener.handleNumber(buffer.parseDouble()); | |
| 1031 } else { | |
| 1032 fail(chunkEnd, "Unterminated number literal"); | |
| 1033 } | |
| 1034 return end; | |
| 483 } | 1035 } |
| 484 | 1036 |
| 485 int parseNumber(int char, int position) { | 1037 int parseNumber(int char, int position) { |
| 486 // Also called on any unexpected character. | 1038 // Also called on any unexpected character. |
| 487 // Format: | 1039 // Format: |
| 488 // '-'?('0'|[1-9][0-9]*)('.'[0-9]+)?([eE][+-]?[0-9]+)? | 1040 // '-'?('0'|[1-9][0-9]*)('.'[0-9]+)?([eE][+-]?[0-9]+)? |
| 489 int start = position; | 1041 int start = position; |
| 490 int length = source.length; | 1042 int length = chunkEnd; |
| 491 int intValue = 0; // Collect int value while parsing. | 1043 int intValue = 0; // Collect int value while parsing. |
| 492 int intSign = 1; | 1044 double doubleValue = 0.0; // Collect double value while parsing. |
| 1045 int sign = 1; | |
| 493 bool isDouble = false; | 1046 bool isDouble = false; |
| 494 // Break this block when the end of the number literal is reached. | 1047 // Break this block when the end of the number literal is reached. |
| 495 // At that time, position points to the next character, and isDouble | 1048 // At that time, position points to the next character, and isDouble |
| 496 // is set if the literal contains a decimal point or an exponential. | 1049 // is set if the literal contains a decimal point or an exponential. |
| 497 parsing: { | 1050 parsing: { |
| 498 if (char == MINUS) { | 1051 if (char == MINUS) { |
| 499 intSign = -1; | 1052 sign = -1; |
| 500 position++; | 1053 position++; |
| 501 if (position == length) fail(position, "Missing expected digit"); | 1054 if (position == length) return beginChunkNumber(NUM_SIGN, start); |
| 502 char = source.codeUnitAt(position); | 1055 char = getChar(position); |
| 503 } | 1056 } |
| 504 if (char < CHAR_0 || char > CHAR_9) { | 1057 int digit = char ^ CHAR_0; |
| 505 if (intSign < 0) { | 1058 if (digit > 9) { |
| 1059 if (sign < 0) { | |
| 506 fail(position, "Missing expected digit"); | 1060 fail(position, "Missing expected digit"); |
| 507 } else { | 1061 } else { |
| 508 // If it doesn't even start out as a numeral. | 1062 // If it doesn't even start out as a numeral. |
| 509 fail(position, "Unexpected character"); | 1063 fail(position, "Unexpected character"); |
| 510 } | 1064 } |
| 511 } | 1065 } |
| 512 if (char == CHAR_0) { | 1066 if (digit == 0) { |
| 513 position++; | 1067 position++; |
| 514 if (position == length) break parsing; | 1068 if (position == length) return beginChunkNumber(NUM_ZERO, start); |
| 515 char = source.codeUnitAt(position); | 1069 char = getChar(position); |
| 516 if (CHAR_0 <= char && char <= CHAR_9) { | 1070 digit = char ^ CHAR_0; |
| 517 fail(position); | 1071 // If starting with zero, next character must not be digit. |
| 518 } | 1072 if (digit <= 9) fail(position); |
| 519 } else { | 1073 } else { |
| 520 do { | 1074 do { |
| 521 intValue = intValue * 10 + (char - CHAR_0); | 1075 intValue = 10 * intValue + digit; |
| 522 position++; | 1076 position++; |
| 523 if (position == length) break parsing; | 1077 if (position == length) return beginChunkNumber(NUM_DIGIT, start); |
| 524 char = source.codeUnitAt(position); | 1078 char = getChar(position); |
| 525 } while (CHAR_0 <= char && char <= CHAR_9); | 1079 digit = char ^ CHAR_0; |
| 1080 } while (digit <= 9); | |
| 526 } | 1081 } |
| 527 if (char == DECIMALPOINT) { | 1082 if (char == DECIMALPOINT) { |
| 528 isDouble = true; | 1083 isDouble = true; |
| 1084 doubleValue = intValue.toDouble(); | |
| 1085 intValue = 0; | |
| 529 position++; | 1086 position++; |
| 530 if (position == length) fail(position, "Missing expected digit"); | 1087 if (position == length) return beginChunkNumber(NUM_DOT, start); |
| 531 char = source.codeUnitAt(position); | 1088 char = getChar(position); |
| 532 if (char < CHAR_0 || char > CHAR_9) fail(position); | 1089 digit = char ^ CHAR_0; |
| 1090 if (digit > 9) fail(position); | |
| 533 do { | 1091 do { |
| 1092 doubleValue = 10.0 * doubleValue + digit; | |
| 1093 intValue -= 1; | |
|
floitsch
2014/10/20 08:52:44
Don't reuse "intValue".
Afaics this has nothing to
Lasse Reichstein Nielsen
2014/10/27 12:42:32
It is collecting *an* integer value.
I'll just do
| |
| 534 position++; | 1094 position++; |
| 535 if (position == length) break parsing; | 1095 if (position == length) return beginChunkNumber(NUM_DOT_DIGIT, start); |
| 536 char = source.codeUnitAt(position); | 1096 char = getChar(position); |
| 537 } while (CHAR_0 <= char && char <= CHAR_9); | 1097 digit = char ^ CHAR_0; |
| 538 } | 1098 } while (digit <= 9); |
| 539 if (char == CHAR_e || char == CHAR_E) { | 1099 } |
| 540 isDouble = true; | 1100 if ((char | 0x20) == CHAR_e) { |
| 1101 if (!isDouble) { | |
| 1102 doubleValue = intValue.toDouble(); | |
| 1103 intValue = 0; | |
|
floitsch
2014/10/20 08:52:44
ditto.
Lasse Reichstein Nielsen
2014/10/27 12:42:32
Same.
| |
| 1104 isDouble = true; | |
| 1105 } | |
| 541 position++; | 1106 position++; |
| 542 if (position == length) fail(position, "Missing expected digit"); | 1107 if (position == length) return beginChunkNumber(NUM_E, start); |
| 543 char = source.codeUnitAt(position); | 1108 char = getChar(position); |
| 1109 int expSign = 1; | |
| 1110 int exponent = 0; | |
| 544 if (char == PLUS || char == MINUS) { | 1111 if (char == PLUS || char == MINUS) { |
| 1112 expSign = 0x2C - char; // -1 for MINUS, +1 for PLUS | |
| 545 position++; | 1113 position++; |
| 546 if (position == length) fail(position, "Missing expected digit"); | 1114 if (position == length) return beginChunkNumber(NUM_E_SIGN, start); |
| 547 char = source.codeUnitAt(position); | 1115 char = getChar(position); |
| 548 } | 1116 } |
| 549 if (char < CHAR_0 || char > CHAR_9) { | 1117 digit = char ^ CHAR_0; |
| 1118 if (digit > 9) { | |
| 550 fail(position, "Missing expected digit"); | 1119 fail(position, "Missing expected digit"); |
| 551 } | 1120 } |
| 552 do { | 1121 do { |
| 1122 exponent = 10 * exponent + digit; | |
| 553 position++; | 1123 position++; |
| 554 if (position == length) break parsing; | 1124 if (position == length) return beginChunkNumber(NUM_E_DIGIT, start); |
| 555 char = source.codeUnitAt(position); | 1125 char = getChar(position); |
| 556 } while (CHAR_0 <= char && char <= CHAR_9); | 1126 digit = char ^ CHAR_0; |
| 1127 } while (digit <= 9); | |
| 1128 intValue += expSign * exponent; | |
| 557 } | 1129 } |
| 558 } | 1130 } |
| 559 if (!isDouble) { | 1131 if (!isDouble) { |
| 560 listener.handleNumber(intSign * intValue); | 1132 listener.handleNumber(sign * intValue); |
| 561 return position; | 1133 return position; |
| 562 } | 1134 } |
| 563 // This correctly creates -0.0 for doubles. | 1135 const double maxExactDouble = 9007199254740992.0; |
|
floitsch
2014/10/20 08:52:44
comment.
Lasse Reichstein Nielsen
2014/10/27 12:42:32
Done.
| |
| 564 listener.handleNumber(_parseDouble(source, start, position)); | 1136 if (doubleValue < maxExactDouble) { |
| 1137 int exponent = intValue; | |
| 1138 double signedMantissa = doubleValue * sign; | |
| 1139 if (exponent >= -22) { | |
| 1140 if (exponent < 0) { | |
| 1141 listener.handleNumber(signedMantissa / POWERS_OF_TEN[-exponent]); | |
| 1142 return position; | |
| 1143 } | |
| 1144 if (exponent == 0) { | |
| 1145 listener.handleNumber(signedMantissa); | |
| 1146 return position; | |
| 1147 } | |
| 1148 if (exponent <= 22) { | |
| 1149 listener.handleNumber(signedMantissa * POWERS_OF_TEN[exponent]); | |
| 1150 return position; | |
| 1151 } | |
| 1152 } | |
| 1153 } | |
| 1154 listener.handleNumber(parseDouble(start, position)); | |
|
floitsch
2014/10/20 08:52:44
comment.
Lasse Reichstein Nielsen
2014/10/27 12:42:33
Done.
| |
| 565 return position; | 1155 return position; |
| 566 } | 1156 } |
| 567 | 1157 |
| 568 static double _parseDouble(String source, int start, int end) | 1158 int fail(int position, [String message]) { |
| 569 native "Double_parse"; | 1159 if (message == null) { |
| 570 | 1160 message = "Unexpected character"; |
| 571 void fail(int position, [String message]) { | 1161 if (position == chunkEnd) message = "Unexpected end of input"; |
| 572 if (message == null) message = "Unexpected character"; | 1162 } |
| 573 throw new FormatException(message, source, position); | 1163 throw new FormatException(message, chunk, position); |
| 574 } | 1164 } |
| 575 } | 1165 } |
| 1166 | |
| 1167 /** | |
| 1168 * Chunked JSON parser that parses [String] chunks. | |
| 1169 */ | |
| 1170 class _JsonStringParser extends _ChunkedJsonParser { | |
| 1171 String chunk; | |
| 1172 int chunkEnd; | |
| 1173 | |
| 1174 _JsonStringParser(_JsonListener listener) : super(listener); | |
| 1175 | |
| 1176 int getChar(int position) => chunk.codeUnitAt(position); | |
| 1177 | |
| 1178 String getString(int start, int end) { | |
| 1179 return chunk.substring(start, end); | |
| 1180 } | |
| 1181 | |
| 1182 void beginString() { | |
| 1183 this.buffer = new StringBuffer(); | |
| 1184 } | |
| 1185 | |
| 1186 void addSliceToString(int start, int end) { | |
| 1187 StringBuffer buffer = this.buffer; | |
| 1188 buffer.write(chunk.substring(start, end)); | |
| 1189 } | |
| 1190 | |
| 1191 void addCharToString(int charCode) { | |
| 1192 StringBuffer buffer = this.buffer; | |
| 1193 buffer.writeCharCode(charCode); | |
| 1194 } | |
| 1195 | |
| 1196 String endString() { | |
| 1197 StringBuffer buffer = this.buffer; | |
| 1198 this.buffer = null; | |
| 1199 return buffer.toString(); | |
| 1200 } | |
| 1201 | |
| 1202 void copyCharsToList(int start, int end, List target, int offset) { | |
| 1203 int length = end - start; | |
| 1204 for (int i = 0; i < length; i++) { | |
| 1205 target[offset + i] = chunk.codeUnitAt(start + i); | |
| 1206 } | |
| 1207 } | |
| 1208 | |
| 1209 double parseDouble(int start, int end) { | |
| 1210 return _parseDouble(chunk, start, end); | |
| 1211 } | |
| 1212 } | |
| 1213 | |
| 1214 patch class JsonDecoder { | |
| 1215 /* patch */ StringConversionSink startChunkedConversion(Sink<Object> sink) { | |
| 1216 return new _JsonStringDecoderSink(this._reviver, sink); | |
| 1217 } | |
| 1218 } | |
| 1219 | |
| 1220 /** | |
| 1221 * Implements the chunked conversion from a JSON string to its corresponding | |
| 1222 * object. | |
| 1223 * | |
| 1224 * The sink only creates one object, but its input can be chunked. | |
| 1225 */ | |
| 1226 class _JsonStringDecoderSink extends StringConversionSinkBase { | |
| 1227 _ChunkedJsonParser _parser; | |
| 1228 Function _reviver; | |
| 1229 final Sink<Object> _sink; | |
| 1230 | |
| 1231 _JsonStringDecoderSink(reviver, this._sink) | |
| 1232 : _reviver = reviver, _parser = _createParser(reviver); | |
| 1233 | |
| 1234 static _ChunkedJsonParser _createParser(reviver) { | |
| 1235 _BuildJsonListener listener; | |
| 1236 if (reviver == null) { | |
| 1237 listener = new _BuildJsonListener(); | |
| 1238 } else { | |
| 1239 listener = new _ReviverJsonListener(reviver); | |
| 1240 } | |
| 1241 return new _JsonStringParser(listener); | |
| 1242 } | |
| 1243 | |
| 1244 void addSlice(String chunk, int start, int end, bool isLast) { | |
| 1245 _parser.chunk = chunk; | |
| 1246 _parser.chunkEnd = end; | |
| 1247 _parser.parse(start); | |
| 1248 if (isLast) _parser.close(); | |
| 1249 } | |
| 1250 | |
| 1251 void add(String chunk) { | |
| 1252 addSlice(chunk, 0, chunk.length, false); | |
| 1253 } | |
| 1254 | |
| 1255 void close() { | |
| 1256 _parser.close(); | |
| 1257 var decoded = _parser.result; | |
| 1258 _sink.add(decoded); | |
| 1259 _sink.close(); | |
| 1260 } | |
| 1261 | |
| 1262 Utf8ConversionSink asUtf8Sink(bool allowMalformed) { | |
| 1263 _parser = null; | |
| 1264 return new _JsonUtf8DecoderSink(_reviver, _sink, allowMalformed); | |
| 1265 } | |
| 1266 } | |
| 1267 | |
| 1268 class _Utf8StringBuffer { | |
| 1269 static const int INITIAL_CAPACITY = 32; | |
| 1270 // Partial state encoding. | |
| 1271 static const int MASK_TWO_BIT = 0x03; | |
| 1272 static const int MASK_SIZE = MASK_TWO_BIT; | |
| 1273 static const int SHIFT_MISSING = 2; | |
| 1274 static const int SHIFT_VALUE = 4; | |
| 1275 static const int NO_PARTIAL = 0; | |
| 1276 | |
| 1277 // UTF-8 encoding and limits. | |
| 1278 static const int MAX_ASCII = 127; | |
| 1279 static const int MAX_TWO_BYTE = 0x7ff; | |
| 1280 static const int MAX_THREE_BYTE = 0xffff; | |
| 1281 static const int MAX_UNICODE = 0X10ffff; | |
| 1282 static const int MASK_TWO_BYTE = 0x1f; | |
| 1283 static const int MASK_THREE_BYTE = 0x0f; | |
| 1284 static const int MASK_FOUR_BYTE = 0x07; | |
| 1285 static const int MASK_CONTINUE_TAG = 0xC0; | |
| 1286 static const int MASK_CONTINUE_VALUE = 0x3f; | |
| 1287 static const int CONTINUE_TAG = 0x80; | |
| 1288 | |
| 1289 // UTF-16 surrogate encoding. | |
| 1290 static const int LEAD_SURROGATE = 0xD800; | |
| 1291 static const int TAIL_SURROGATE = 0xDC00; | |
| 1292 static const int SHIFT_HIGH_SURROGATE = 10; | |
| 1293 static const int MASK_LOW_SURROGATE = 0x3ff; | |
| 1294 | |
|
Søren Gjesse
2014/10/24 11:12:24
Comment that buffer starts as Uint8, but might cha
Lasse Reichstein Nielsen
2014/10/27 12:42:32
Comment added. We convert all non-BMP characters t
| |
| 1295 List<int> buffer = new Uint8List(INITIAL_CAPACITY); | |
| 1296 int length = 0; | |
| 1297 int partialState = NO_PARTIAL; | |
| 1298 bool isLatin1 = true; | |
| 1299 // If allowing malformed, invalid UTF-8 sequences are converted to | |
| 1300 // U+FFFD. | |
| 1301 bool allowMalformed; | |
| 1302 | |
| 1303 _Utf8StringBuffer(this.allowMalformed); | |
| 1304 | |
| 1305 /** | |
| 1306 * Parse the continuation of a multi-byte UTF-8 sequence. | |
| 1307 * | |
| 1308 * Parse [utf8] from [position] to [end]. If the sequence extends beyond | |
| 1309 * `end`, store the partial state in [partialState], and continue from there | |
| 1310 * on the next added slice. | |
| 1311 * | |
| 1312 * The [size] is the number of expected continuation bytes total, | |
| 1313 * and [missing] is the number of remaining continuation bytes. | |
| 1314 * The [size] is used to detect overlong encodings. | |
| 1315 * The [value] is the value collected so far. | |
| 1316 * | |
| 1317 * When called after seeing the first multi-byte marker, the [size] and | |
| 1318 * [missing] values are always the same, but they may differ if continuing | |
| 1319 * after a partial sequence. | |
| 1320 */ | |
| 1321 int addContinuation(List<int> utf8, int position, int end, | |
| 1322 int size, int missing, int value) { | |
| 1323 int codeEnd = position + missing; | |
| 1324 do { | |
| 1325 if (position == end) { | |
| 1326 missing = codeEnd - position; | |
| 1327 partialState = | |
| 1328 size | (missing << SHIFT_MISSING) | (value << SHIFT_VALUE); | |
| 1329 return end; | |
| 1330 } | |
| 1331 int char = utf8[position]; | |
| 1332 if ((char & MASK_CONTINUE_TAG) != CONTINUE_TAG) { | |
| 1333 if (allowMalformed) { | |
| 1334 addCharCode(0xFFFD); | |
| 1335 return position; | |
| 1336 } | |
| 1337 throw new FormatException("Expected UTF-8 continuation byte, " | |
| 1338 "found $char", utf8, position); | |
| 1339 } | |
| 1340 value = 64 * value + (char & MASK_CONTINUE_VALUE); | |
| 1341 position++; | |
| 1342 } while (position < codeEnd); | |
| 1343 if (value <= const [0, MAX_ASCII, MAX_TWO_BYTE, MAX_THREE_BYTE][size]) { | |
| 1344 // Over-long encoding. | |
| 1345 if (allowMalformed) { | |
| 1346 value = 0xFFFD; | |
| 1347 } else { | |
| 1348 throw new FormatException( | |
| 1349 "Invalid encoding: U+${value.toRadixString(16).padLeft(4, '0')}" | |
| 1350 " encoded in ${size + 1} bytes.", utf8, position - 1); | |
| 1351 } | |
| 1352 } | |
| 1353 addCharCode(value); | |
| 1354 return position; | |
| 1355 } | |
| 1356 | |
| 1357 void addCharCode(int char) { | |
| 1358 assert(char >= 0); | |
| 1359 assert(char <= MAX_UNICODE); | |
| 1360 if (partialState != NO_PARTIAL) { | |
| 1361 if (allowMalformed) { | |
| 1362 partialState = NO_PARTIAL; | |
| 1363 addCharCode(0xFFFD); | |
| 1364 } else { | |
| 1365 throw new FormatException("Incomplete UTF-8 sequence", utf8); | |
| 1366 } | |
| 1367 } | |
| 1368 if (isLatin1 && char > 0xff) { | |
| 1369 _to16Bit(); // Also grows a little if close to full. | |
| 1370 } | |
| 1371 int length = this.length; | |
| 1372 if (char <= MAX_THREE_BYTE) { | |
| 1373 if (length == buffer.length) _grow(); | |
| 1374 buffer[length] = char; | |
| 1375 this.length = length + 1; | |
| 1376 return; | |
| 1377 } | |
| 1378 if (length + 2 > buffer.length) _grow(); | |
| 1379 int bits = char - 0x10000; | |
| 1380 buffer[length] = LEAD_SURROGATE | (bits >> SHIFT_HIGH_SURROGATE); | |
| 1381 buffer[length + 1] = TAIL_SURROGATE | (bits & MASK_LOW_SURROGATE); | |
| 1382 this.length = length + 2; | |
| 1383 } | |
| 1384 | |
| 1385 void _to16Bit() { | |
| 1386 assert(isLatin1); | |
| 1387 int newCapacity = buffer.length; | |
| 1388 if (newCapacity - length < INITIAL_CAPACITY) { | |
| 1389 newCapacity = length + INITIAL_CAPACITY; | |
| 1390 } | |
| 1391 Uint16List newBuffer = new Uint16List(newCapacity); | |
| 1392 newBuffer.setRange(0, length, buffer, 0); | |
| 1393 buffer = newBuffer; | |
| 1394 isLatin1 = false; | |
| 1395 } | |
| 1396 | |
| 1397 void _grow() { | |
| 1398 int newCapacity = buffer.length * 2; | |
| 1399 List newBuffer; | |
| 1400 if (isLatin1) { | |
| 1401 newBuffer = new Uint8List(newCapacity); | |
| 1402 } else { | |
| 1403 newBuffer = new Uint16List(newCapacity); | |
| 1404 } | |
| 1405 newBuffer.setRange(0, length, buffer); | |
| 1406 buffer = newBuffer; | |
| 1407 } | |
| 1408 | |
| 1409 void addSlice(List<int> utf8, int position, int end) { | |
| 1410 assert(position < end); | |
| 1411 if (partialState > 0) { | |
| 1412 int continueByteCount = (partialState & MASK_TWO_BIT); | |
| 1413 int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT; | |
| 1414 int value = partialState >> SHIFT_VALUE; | |
| 1415 partialState = NO_PARTIAL; | |
| 1416 position = addContinuation(utf8, position, end, | |
| 1417 continueByteCount, missing, value); | |
| 1418 if (position == end) return; | |
| 1419 } | |
| 1420 int index = length; | |
| 1421 int capacity = buffer.length; | |
| 1422 while (position < end) { | |
| 1423 int char = utf8[position]; | |
| 1424 if (char <= MAX_ASCII) { | |
| 1425 if (index == capacity) _grow(); | |
| 1426 buffer[index++] = char; | |
| 1427 position++; | |
| 1428 continue; | |
| 1429 } | |
| 1430 length = index; | |
| 1431 if ((char & MASK_CONTINUE_TAG) == CONTINUE_TAG) { | |
| 1432 if (allowMalformed) { | |
| 1433 addCharCode(0xFFFD); | |
| 1434 position++; | |
| 1435 } else { | |
| 1436 throw new FormatException("Unexepected UTF-8 continuation byte", | |
| 1437 utf8, position); | |
| 1438 } | |
| 1439 } else if (char < 0xE0) { // C0-DF | |
| 1440 // Two-byte. | |
| 1441 position = addContinuation(utf8, position + 1, end, 1, 1, | |
| 1442 char & MASK_TWO_BYTE); | |
| 1443 } else if (char < 0xF0) { // E0-EF | |
| 1444 // Three-byte. | |
| 1445 position = addContinuation(utf8, position + 1, end, 2, 2, | |
| 1446 char & MASK_THREE_BYTE); | |
| 1447 } else if (char < 0xF8) { // F0-F7 | |
| 1448 // Four-byte. | |
| 1449 position = addContinuation(utf8, position + 1, end, 3, 3, | |
| 1450 char & MASK_FOUR_BYTE); | |
| 1451 } else { | |
| 1452 if (allowMalformed) { | |
| 1453 addCharCode(0xFFFD); | |
| 1454 position++; | |
| 1455 } else { | |
| 1456 throw new FormatException("Invalid UTF-8 byte: $char", | |
| 1457 utf8, position); | |
| 1458 } | |
| 1459 } | |
| 1460 index = length; | |
| 1461 } | |
| 1462 length = index; | |
| 1463 } | |
| 1464 | |
| 1465 String toString() { | |
| 1466 if (partialState != NO_PARTIAL) { | |
| 1467 if (allowMalformed) { | |
| 1468 partialState = NO_PARTIAL; | |
| 1469 addCharCode(0xFFFD); | |
| 1470 } else { | |
| 1471 int continueByteCount = (partialState & MASK_TWO_BIT); | |
| 1472 int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT; | |
| 1473 int value = partialState >> SHIFT_VALUE; | |
| 1474 int seenByteCount = continueByteCount - missing + 1; | |
| 1475 List source = new Uint8List(seenByteCount); | |
| 1476 while (seenByteCount > 1) { | |
| 1477 seenByteCount--; | |
| 1478 source[seenByteCount] = CONTINUE_TAG | (value & MASK_CONTINUE_VALUE); | |
| 1479 value >>= 6; | |
| 1480 } | |
| 1481 source[0] = value | (0x3c0 >> (continueByteCount - 1)); | |
| 1482 throw new FormatException("Incomplete UTF-8 sequence", | |
| 1483 source, source.length); | |
| 1484 } | |
| 1485 } | |
| 1486 return new String.fromCharCodes(buffer, 0, length); | |
| 1487 } | |
| 1488 } | |
| 1489 | |
| 1490 /** | |
| 1491 * Chunked JSON parser that parses UTF-8 chunks. | |
| 1492 */ | |
| 1493 class _JsonUtf8Parser extends _ChunkedJsonParser { | |
| 1494 final bool allowMalformed; | |
| 1495 List<int> chunk; | |
| 1496 int chunkEnd; | |
| 1497 | |
| 1498 _JsonUtf8Parser(_JsonListener listener, this.allowMalformed) | |
| 1499 : super(listener); | |
| 1500 | |
| 1501 int getChar(int position) => chunk[position]; | |
| 1502 | |
| 1503 String getString(int start, int end) { | |
| 1504 beginString(); | |
| 1505 addSliceToString(start, end); | |
| 1506 String result = endString(); | |
| 1507 return result; | |
| 1508 } | |
| 1509 | |
| 1510 void beginString() { | |
| 1511 this.buffer = new _Utf8StringBuffer(allowMalformed); | |
| 1512 } | |
| 1513 | |
| 1514 void addSliceToString(int start, int end) { | |
| 1515 _Utf8StringBuffer buffer = this.buffer; | |
| 1516 buffer.addSlice(chunk, start, end); | |
| 1517 } | |
| 1518 | |
| 1519 void addCharToString(int charCode) { | |
| 1520 _Utf8StringBuffer buffer = this.buffer; | |
| 1521 buffer.addCharCode(charCode); | |
| 1522 } | |
| 1523 | |
| 1524 String endString() { | |
| 1525 _Utf8StringBuffer buffer = this.buffer; | |
| 1526 this.buffer = null; | |
| 1527 return buffer.toString(); | |
| 1528 } | |
| 1529 | |
| 1530 void copyCharsToList(int start, int end, List target, int offset) { | |
| 1531 int length = end - start; | |
| 1532 target.setRange(offset, offset + length, chunk, start); | |
| 1533 } | |
| 1534 | |
| 1535 double parseDouble(int start, int end) { | |
| 1536 String string = getString(start, end); | |
| 1537 reutrn _parseDouble(string, 0, string.length); | |
| 1538 } | |
| 1539 } | |
| 1540 | |
| 1541 double _parseDouble(String source, int start, int end) | |
| 1542 native "Double_parse"; | |
| 1543 | |
| 1544 /** | |
| 1545 * Implements the chunked conversion from a UTF-8 encoding of JSON | |
| 1546 * to its corresponding object. | |
| 1547 */ | |
| 1548 class _JsonUtf8DecoderSink extends ByteConversionSinkBase { | |
| 1549 _ChunkedUtf8Parser _parser; | |
| 1550 final Sink<Object> _sink; | |
| 1551 | |
| 1552 _JsonUtf8DecoderSink(reviver, this._sink, bool allowMalformed) | |
| 1553 : _parser = _createParser(reviver, allowMalformed); | |
| 1554 | |
| 1555 static _ChunkedJsonParser _createParser(reviver, bool allowMalformed) { | |
| 1556 _BuildJsonListener listener; | |
| 1557 if (reviver == null) { | |
| 1558 listener = new _BuildJsonListener(); | |
| 1559 } else { | |
| 1560 listener = new _ReviverJsonListener(reviver); | |
| 1561 } | |
| 1562 return new _JsonUtf8Parser(listener, allowMalformed); | |
| 1563 } | |
| 1564 | |
| 1565 void addSlice(List<int> chunk, int start, int end, bool isLast) { | |
| 1566 _parser.chunk = chunk; | |
| 1567 _parser.chunkEnd = end; | |
| 1568 _parser.parse(start); | |
| 1569 if (isLast) _parser.close(); | |
| 1570 } | |
| 1571 | |
| 1572 void add(List<int> chunk) { | |
| 1573 addSlice(chunk, 0, chunk.length, false); | |
| 1574 } | |
| 1575 | |
| 1576 void close() { | |
| 1577 _parser.close(); | |
| 1578 var decoded = _parser.result; | |
| 1579 _sink.add(decoded); | |
| 1580 _sink.close(); | |
| 1581 } | |
| 1582 } | |
| OLD | NEW |