OLD | NEW |
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
| 5 import "dart:_internal" show POWERS_OF_TEN; |
| 6 |
5 // JSON conversion. | 7 // JSON conversion. |
6 | 8 |
7 patch _parseJson(String json, reviver(var key, var value)) { | 9 patch _parseJson(String json, reviver(var key, var value)) { |
8 _BuildJsonListener listener; | 10 _BuildJsonListener listener; |
9 if (reviver == null) { | 11 if (reviver == null) { |
10 listener = new _BuildJsonListener(); | 12 listener = new _BuildJsonListener(); |
11 } else { | 13 } else { |
12 listener = new _ReviverJsonListener(reviver); | 14 listener = new _ReviverJsonListener(reviver); |
13 } | 15 } |
14 new _JsonParser(json, listener).parse(); | 16 var parser = new _JsonStringParser(listener); |
| 17 parser.chunk = json; |
| 18 parser.chunkEnd = json.length; |
| 19 parser.parse(0); |
| 20 parser.close(); |
15 return listener.result; | 21 return listener.result; |
16 } | 22 } |
17 | 23 |
18 //// Implementation /////////////////////////////////////////////////////////// | 24 //// Implementation /////////////////////////////////////////////////////////// |
19 | 25 |
20 // Simple API for JSON parsing. | 26 // Simple API for JSON parsing. |
21 | 27 |
| 28 /** |
| 29 * Listener for parsing events from [_ChunkedJsonParser]. |
| 30 */ |
22 abstract class _JsonListener { | 31 abstract class _JsonListener { |
23 void handleString(String value) {} | 32 void handleString(String value) {} |
24 void handleNumber(num value) {} | 33 void handleNumber(num value) {} |
25 void handleBool(bool value) {} | 34 void handleBool(bool value) {} |
26 void handleNull() {} | 35 void handleNull() {} |
27 void beginObject() {} | 36 void beginObject() {} |
28 void propertyName() {} | 37 void propertyName() {} |
29 void propertyValue() {} | 38 void propertyValue() {} |
30 void endObject() {} | 39 void endObject() {} |
31 void beginArray() {} | 40 void beginArray() {} |
32 void arrayElement() {} | 41 void arrayElement() {} |
33 void endArray() {} | 42 void endArray() {} |
34 } | 43 } |
35 | 44 |
36 /** | 45 /** |
37 * A [JsonListener] that builds data objects from the parser events. | 46 * A [_JsonListener] that builds data objects from the parser events. |
38 * | 47 * |
39 * This is a simple stack-based object builder. It keeps the most recently | 48 * This is a simple stack-based object builder. It keeps the most recently |
40 * seen value in a variable, and uses it depending on the following event. | 49 * seen value in a variable, and uses it depending on the following event. |
41 */ | 50 */ |
42 class _BuildJsonListener extends _JsonListener { | 51 class _BuildJsonListener extends _JsonListener { |
43 /** | 52 /** |
44 * Stack used to handle nested containers. | 53 * Stack used to handle nested containers. |
45 * | 54 * |
46 * The current container is pushed on the stack when a new one is | 55 * The current container is pushed on the stack when a new one is |
47 * started. If the container is a [Map], there is also a current [key] | 56 * started. If the container is a [Map], there is also a current [key] |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
128 void propertyValue() { | 137 void propertyValue() { |
129 value = reviver(key, value); | 138 value = reviver(key, value); |
130 super.propertyValue(); | 139 super.propertyValue(); |
131 } | 140 } |
132 | 141 |
133 get result { | 142 get result { |
134 return reviver(null, value); | 143 return reviver(null, value); |
135 } | 144 } |
136 } | 145 } |
137 | 146 |
138 class _JsonParser { | 147 /** |
| 148 * Buffer holding parts of a numeral. |
| 149 * |
| 150 * The buffer contains the characters of a JSON number. |
| 151 * These are all ASCII, so an [Uint8List] is used as backing store. |
| 152 * |
| 153 * This buffer is used when a JSON number is split between separate chunks. |
| 154 * |
| 155 */ |
| 156 class _NumberBuffer { |
| 157 static const int minCapacity = 16; |
| 158 static const int kDefaultOverhead = 5; |
| 159 Uint8List list; |
| 160 int length = 0; |
| 161 _NumberBuffer(int initialCapacity) |
| 162 : list = new Uint8List(_initialCapacity(initialCapacity)); |
| 163 |
| 164 int get capacity => list.length; |
| 165 |
| 166 // Pick an initial capacity greater than the first part's size. |
| 167 // The typical use case has two parts, this is the attempt at |
| 168 // guessing the size of the second part without overdoing it. |
| 169 // The default estimate of the second part is [kDefaultOverhead], |
| 170 // then round to multiplum of four, and return the result, |
| 171 // or [minCapacity] if that is greater. |
| 172 static int _initialCapacity(int minCapacity) { |
| 173 minCapacity += kDefaultOverhead; |
| 174 if (minCapacity < minCapacity) return minCapacity; |
| 175 minCapacity = (minCapacity + 3) & ~3; // Round to multiple of four. |
| 176 return minCapacity; |
| 177 } |
| 178 |
| 179 // Grows to the exact size asked for. |
| 180 void ensureCapacity(int newCapacity) { |
| 181 Uint8List list = this.list; |
| 182 if (newCapacity <= list.length) return; |
| 183 Uint8List newList = new Uint8List(newCapacity); |
| 184 newList.setRange(0, list.length, list, 0); |
| 185 this.list = newList; |
| 186 } |
| 187 |
| 188 String getString() { |
| 189 var list = this.list; |
| 190 if (length < list.length) { |
| 191 list = new Uint8List.view(list.buffer, 0, length); |
| 192 } |
| 193 String result = new String.fromCharCodes(list); |
| 194 return result; |
| 195 } |
| 196 |
| 197 // TODO(lrn): See if parsing of numbers can be abstracted to something |
| 198 // not only working on strings, but also on char-code lists, without lossing |
| 199 // performance. |
| 200 int parseInt() => int.parse(getString()); |
| 201 double parseDouble() => double.parse(getString()); |
| 202 } |
| 203 |
| 204 /** |
| 205 * Chunked JSON parser. |
| 206 * |
| 207 * Receives inputs in chunks, gives access to individual parts of the input, |
| 208 * and stores input state between chunks. |
| 209 * |
| 210 * Implementations include [String] and UTF-8 parsers. |
| 211 */ |
| 212 abstract class _ChunkedJsonParser { |
139 // A simple non-recursive state-based parser for JSON. | 213 // A simple non-recursive state-based parser for JSON. |
140 // | 214 // |
141 // Literal values accepted in states ARRAY_EMPTY, ARRAY_COMMA, OBJECT_COLON | 215 // Literal values accepted in states ARRAY_EMPTY, ARRAY_COMMA, OBJECT_COLON |
142 // and strings also in OBJECT_EMPTY, OBJECT_COMMA. | 216 // and strings also in OBJECT_EMPTY, OBJECT_COMMA. |
143 // VALUE STRING : , } ] Transitions to | 217 // VALUE STRING : , } ] Transitions to |
144 // EMPTY X X -> END | 218 // EMPTY X X -> END |
145 // ARRAY_EMPTY X X @ -> ARRAY_VALUE / pop | 219 // ARRAY_EMPTY X X @ -> ARRAY_VALUE / pop |
146 // ARRAY_VALUE @ @ -> ARRAY_COMMA / pop | 220 // ARRAY_VALUE @ @ -> ARRAY_COMMA / pop |
147 // ARRAY_COMMA X X -> ARRAY_VALUE | 221 // ARRAY_COMMA X X -> ARRAY_VALUE |
148 // OBJECT_EMPTY X @ -> OBJECT_KEY / pop | 222 // OBJECT_EMPTY X @ -> OBJECT_KEY / pop |
(...skipping 16 matching lines...) Expand all Loading... |
165 static const int INSIDE_OBJECT = 2; | 239 static const int INSIDE_OBJECT = 2; |
166 static const int AFTER_COLON = 3; // Always inside object. | 240 static const int AFTER_COLON = 3; // Always inside object. |
167 | 241 |
168 static const int ALLOW_STRING_MASK = 8; // Allowed if zero. | 242 static const int ALLOW_STRING_MASK = 8; // Allowed if zero. |
169 static const int ALLOW_VALUE_MASK = 4; // Allowed if zero. | 243 static const int ALLOW_VALUE_MASK = 4; // Allowed if zero. |
170 static const int ALLOW_VALUE = 0; | 244 static const int ALLOW_VALUE = 0; |
171 static const int STRING_ONLY = 4; | 245 static const int STRING_ONLY = 4; |
172 static const int NO_VALUES = 12; | 246 static const int NO_VALUES = 12; |
173 | 247 |
174 // Objects and arrays are "empty" until their first property/element. | 248 // Objects and arrays are "empty" until their first property/element. |
| 249 // At this position, they may either have an entry or a close-bracket. |
175 static const int EMPTY = 0; | 250 static const int EMPTY = 0; |
176 static const int NON_EMPTY = 16; | 251 static const int NON_EMPTY = 16; |
177 static const int EMPTY_MASK = 16; // Empty if zero. | 252 static const int EMPTY_MASK = 16; // Empty if zero. |
178 | 253 |
179 | |
180 static const int VALUE_READ_BITS = NO_VALUES | NON_EMPTY; | 254 static const int VALUE_READ_BITS = NO_VALUES | NON_EMPTY; |
181 | 255 |
182 // Actual states. | 256 // Actual states. |
183 static const int STATE_INITIAL = EMPTY | ALLOW_VALUE; | 257 static const int STATE_INITIAL = EMPTY | ALLOW_VALUE; |
184 static const int STATE_END = NON_EMPTY | NO_VALUES; | 258 static const int STATE_END = NON_EMPTY | NO_VALUES; |
185 | 259 |
186 static const int STATE_ARRAY_EMPTY = INSIDE_ARRAY | EMPTY | ALLOW_VALUE; | 260 static const int STATE_ARRAY_EMPTY = INSIDE_ARRAY | EMPTY | ALLOW_VALUE; |
187 static const int STATE_ARRAY_VALUE = INSIDE_ARRAY | NON_EMPTY | NO_VALUES; | 261 static const int STATE_ARRAY_VALUE = INSIDE_ARRAY | NON_EMPTY | NO_VALUES; |
188 static const int STATE_ARRAY_COMMA = INSIDE_ARRAY | NON_EMPTY | ALLOW_VALUE; | 262 static const int STATE_ARRAY_COMMA = INSIDE_ARRAY | NON_EMPTY | ALLOW_VALUE; |
189 | 263 |
(...skipping 29 matching lines...) Expand all Loading... |
219 static const int CHAR_f = 0x66; | 293 static const int CHAR_f = 0x66; |
220 static const int CHAR_l = 0x6c; | 294 static const int CHAR_l = 0x6c; |
221 static const int CHAR_n = 0x6e; | 295 static const int CHAR_n = 0x6e; |
222 static const int CHAR_r = 0x72; | 296 static const int CHAR_r = 0x72; |
223 static const int CHAR_s = 0x73; | 297 static const int CHAR_s = 0x73; |
224 static const int CHAR_t = 0x74; | 298 static const int CHAR_t = 0x74; |
225 static const int CHAR_u = 0x75; | 299 static const int CHAR_u = 0x75; |
226 static const int LBRACE = 0x7b; | 300 static const int LBRACE = 0x7b; |
227 static const int RBRACE = 0x7d; | 301 static const int RBRACE = 0x7d; |
228 | 302 |
229 final String source; | 303 // State of partial value at chunk split. |
| 304 static const int NO_PARTIAL = 0; |
| 305 static const int PARTIAL_STRING = 1; |
| 306 static const int PARTIAL_NUMERAL = 2; |
| 307 static const int PARTIAL_KEYWORD = 3; |
| 308 static const int MASK_PARTIAL = 3; |
| 309 |
| 310 // Partial states for numerals. Values can be |'ed with PARTIAL_NUMERAL. |
| 311 static const int NUM_SIGN = 0; // After initial '-'. |
| 312 static const int NUM_ZERO = 4; // After '0' as first digit. |
| 313 static const int NUM_DIGIT = 8; // After digit, no '.' or 'e' seen. |
| 314 static const int NUM_DOT = 12; // After '.'. |
| 315 static const int NUM_DOT_DIGIT = 16; // After a decimal digit (after '.'). |
| 316 static const int NUM_E = 20; // After 'e' or 'E'. |
| 317 static const int NUM_E_SIGN = 24; // After '-' or '+' after 'e' or 'E'. |
| 318 static const int NUM_E_DIGIT = 28; // After exponent digit. |
| 319 static const int NUM_SUCCESS = 32; // Never stored as partial state. |
| 320 |
| 321 // Partial states for strings. |
| 322 static const int STR_PLAIN = 0; // Inside string, but not escape. |
| 323 static const int STR_ESCAPE = 4; // After '\'. |
| 324 static const int STR_U = 16; // After '\u' and 0-3 hex digits. |
| 325 static const int STR_U_COUNT_SHIFT = 2; // Hex digit count in bits 2-3. |
| 326 static const int STR_U_VALUE_SHIFT = 5; // Hex digit value in bits 5+. |
| 327 |
| 328 // Partial states for keywords. |
| 329 static const int KWD_TYPE_MASK = 12; |
| 330 static const int KWD_TYPE_SHIFT = 2; |
| 331 static const int KWD_NULL = 0; // Prefix of "null" seen. |
| 332 static const int KWD_TRUE = 4; // Prefix of "true" seen. |
| 333 static const int KWD_FALSE = 8; // Prefix of "false" seen. |
| 334 static const int KWD_COUNT_SHIFT = 4; // Prefix length in bits 4+. |
| 335 |
| 336 // Mask used to mask off two lower bits. |
| 337 static const int TWO_BIT_MASK = 3; |
| 338 |
230 final _JsonListener listener; | 339 final _JsonListener listener; |
231 _JsonParser(this.source, this.listener); | 340 |
232 | 341 // The current parsing state. |
233 /** Parses [source], or throws if it fails. */ | 342 int state = STATE_INITIAL; |
234 void parse() { | 343 List<int> states = <int>[]; |
235 final List<int> states = <int>[]; | 344 |
236 int state = STATE_INITIAL; | 345 /** |
237 int position = 0; | 346 * Stores tokenizer state between chunks. |
238 int length = source.length; | 347 * |
| 348 * This state is stored when a chunk stops in the middle of a |
| 349 * token (string, numeral, boolean or null). |
| 350 * |
| 351 * The partial state is used to continue parsing on the next chunk. |
| 352 * The previous chunk is not retained, any data needed are stored in |
| 353 * this integer, or in the [buffer] field as a string-building buffer |
| 354 * or a [_NumberBuffer]. |
| 355 * |
| 356 * Prefix state stored in [prefixState] as bits. |
| 357 * |
| 358 * ..00 : No partial value (NO_PARTIAL). |
| 359 * |
| 360 * ..00001 : Partial string, not inside escape. |
| 361 * ..00101 : Partial string, after '\'. |
| 362 * ..vvvv1dd01 : Partial \u escape. |
| 363 * The 'dd' bits (2-3) encode the number of hex digits seen. |
| 364 * Bits 5-16 encode the value of the hex digits seen so far. |
| 365 * |
| 366 * ..0ddd10 : Partial numeral. |
| 367 * The `ddd` bits store the parts of in the numeral seen so |
| 368 * far, as the constants `NUM_*` defined above. |
| 369 * The characters of the numeral are stored in [buffer] |
| 370 * as a [_NumberBuffer]. |
| 371 * |
| 372 * ..0ddd0011 : Partial 'null' keyword. |
| 373 * ..0ddd0111 : Partial 'true' keyword. |
| 374 * ..0ddd1011 : Partial 'false' keyword. |
| 375 * For all three keywords, the `ddd` bits encode the number |
| 376 * of letters seen. |
| 377 */ |
| 378 int partialState = NO_PARTIAL; |
| 379 |
| 380 /** |
| 381 * Extra data stored while parsing a primitive value. |
| 382 * May be set during parsing, always set at chunk end if a value is partial. |
| 383 * |
| 384 * May contain a string buffer while parsing strings. |
| 385 */ |
| 386 var buffer = null; |
| 387 |
| 388 _ChunkedJsonParser(this.listener); |
| 389 |
| 390 /** |
| 391 * Push the current parse [state] on a stack. |
| 392 * |
| 393 * State is pushed when a new array or object literal starts, |
| 394 * so the parser can go back to the correct value when the literal ends. |
| 395 */ |
| 396 void saveState(int state) { |
| 397 states.add(state); |
| 398 } |
| 399 |
| 400 /** |
| 401 * Restore a state pushed with [saveState]. |
| 402 */ |
| 403 int restoreState() { |
| 404 return states.removeLast(); // Throws if empty. |
| 405 } |
| 406 |
| 407 /** |
| 408 * Finalizes the parsing. |
| 409 * |
| 410 * Throws if the source read so far doesn't end up with a complete |
| 411 * parsed value. That means it must not be inside a list or object |
| 412 * literal, and any partial value read should also be a valid complete |
| 413 * value. |
| 414 * |
| 415 * The only valid partial state is a number that ends in a digit, and |
| 416 * only if the number is the entire JSON value being parsed |
| 417 * (otherwise it would be inside a list or object). |
| 418 * Such a number will be completed. Any other partial state is an error. |
| 419 */ |
| 420 void close() { |
| 421 if (partialState != NO_PARTIAL) { |
| 422 int partialType = partialState & MASK_PARTIAL; |
| 423 if (partialType == PARTIAL_NUMERAL) { |
| 424 int numState = partialState & ~MASK_PARTIAL; |
| 425 // A partial number might be a valid number if we know it's done. |
| 426 // There is an unnecessary overhead if input is a single number, |
| 427 // but this is assumed to be rare. |
| 428 _NumberBuffer buffer = this.buffer; |
| 429 this.buffer = null; |
| 430 finishChunkNumber(numState, 0, 0, buffer); |
| 431 } else if (partialType == PARTIAL_STRING) { |
| 432 fail(chunkEnd, "Unterminated string"); |
| 433 } else { |
| 434 assert(partialType == PARTIAL_KEYWORD); |
| 435 fail(chunkEnd); // Incomplete literal. |
| 436 } |
| 437 } |
| 438 if (state != STATE_END) { |
| 439 fail(chunkEnd); |
| 440 } |
| 441 } |
| 442 |
| 443 /** |
| 444 * Read out the result after successfully closing the parser. |
| 445 * |
| 446 * The parser is closed by calling [close] or calling [addSourceChunk] with |
| 447 * `true` as second (`isLast`) argument. |
| 448 */ |
| 449 Object get result { |
| 450 return listener.result; |
| 451 } |
| 452 |
| 453 /** Sets the current source chunk. */ |
| 454 void set chunk(var source); |
| 455 |
| 456 /** |
| 457 * Length of current chunk. |
| 458 * |
| 459 * The valid arguments to [getChar] are 0 .. `chunkEnd - 1`. |
| 460 */ |
| 461 int get chunkEnd; |
| 462 |
| 463 /** |
| 464 * Returns the chunk itself. |
| 465 * |
| 466 * Only used by [fail] to include the chunk in the thrown [FormatException]. |
| 467 */ |
| 468 get chunk; |
| 469 |
| 470 /** |
| 471 * Get charcacter/code unit of current chunk. |
| 472 * |
| 473 * The [index] must be non-negative and less than `chunkEnd`. |
| 474 * In practive, [index] will be no smaller than the `start` argument passed |
| 475 * to [parse]. |
| 476 */ |
| 477 int getChar(int index); |
| 478 |
| 479 /** |
| 480 * Copy ASCII characters from start to end of chunk into a list. |
| 481 * |
| 482 * Used for number buffer (always copies ASCII, so encoding is not important). |
| 483 */ |
| 484 void copyCharsToList(int start, int end, List<int> target); |
| 485 |
| 486 /** |
| 487 * Build a string using input code units. |
| 488 * |
| 489 * Creates a string buffer and enables adding characters and slices |
| 490 * to that buffer. |
| 491 * The buffer is stored in the [buffer] field. If the string is unterminated, |
| 492 * the same buffer is used to continue parsing in the next chunk. |
| 493 */ |
| 494 void beginString(); |
| 495 /** |
| 496 * Add single character code to string being built. |
| 497 * |
| 498 * Used for unparsed escape sequences. |
| 499 */ |
| 500 void addCharToString(int charCode); |
| 501 |
| 502 /** |
| 503 * Adds slice of current chunk to string being built. |
| 504 * |
| 505 * The [start] positions is inclusive, [end] is exclusive. |
| 506 */ |
| 507 void addSliceToString(int start, int end); |
| 508 |
| 509 /** Finalizes the string being built and returns it as a String. */ |
| 510 String endString(); |
| 511 |
| 512 /** |
| 513 * Extracts a literal string from a slice of the current chunk. |
| 514 * |
| 515 * No interpretation of the content is performed, except for converting |
| 516 * the source format to string. |
| 517 * This can be implemented more or less efficiently depending on the |
| 518 * underlying source. |
| 519 * |
| 520 * This is used for string literals that contain no escapes. |
| 521 */ |
| 522 String getString(int start, int end); |
| 523 |
| 524 /** |
| 525 * Parse a slice of the current chunk as an integer. |
| 526 * |
| 527 * The format is expected to be correct. |
| 528 */ |
| 529 int parseInt(int start, int end) { |
| 530 return int.parse(getString(start, end)); |
| 531 } |
| 532 |
| 533 /** |
| 534 * Parse a slice of the current chunk as a double. |
| 535 * |
| 536 * The format is expected to be correct. |
| 537 * This is used by [parseNumber] when the double value cannot be |
| 538 * built exactly during parsing. |
| 539 */ |
| 540 double parseDouble(int start, int end) { |
| 541 return double.parse(getString(start, end)); |
| 542 } |
| 543 |
| 544 /** |
| 545 * Create a _NumberBuffer containing the digits from [start] to [chunkEnd]. |
| 546 * |
| 547 * This creates a number buffer and initializes it with the part of the |
| 548 * number literal ending the current chunk |
| 549 */ |
| 550 void createNumberBuffer(int start) { |
| 551 assert(start >= 0); |
| 552 assert(start < chunkEnd); |
| 553 int length = chunkEnd - start; |
| 554 var buffer = new _NumberBuffer(length); |
| 555 copyCharsToList(start, chunkEnd, buffer.list); |
| 556 buffer.length = length; |
| 557 return buffer; |
| 558 } |
| 559 |
| 560 /** |
| 561 * Continues parsing a partial value. |
| 562 */ |
| 563 int parsePartial(int position) { |
| 564 if (position == chunkEnd) return position; |
| 565 int partialState = this.partialState; |
| 566 assert(partialState != NO_PARTIAL); |
| 567 int partialType = partialState & MASK_PARTIAL; |
| 568 this.partialState = NO_PARTIAL; |
| 569 partialState = partialState & ~MASK_PARTIAL; |
| 570 assert(partialType != 0); |
| 571 if (partialType == PARTIAL_STRING) { |
| 572 position = parsePartialString(position, partialState); |
| 573 } else if (partialType == PARTIAL_NUMERAL) { |
| 574 position = parsePartialNumber(position, partialState); |
| 575 } else if (partialType == PARTIAL_KEYWORD) { |
| 576 position = parsePartialKeyword(position, partialState); |
| 577 } |
| 578 return position; |
| 579 } |
| 580 |
| 581 /** |
| 582 * Parses the remainder of a number into the number buffer. |
| 583 * |
| 584 * Syntax is checked while pasing. |
| 585 * Starts at position, which is expected to be the start of the chunk, |
| 586 * and returns the index of the first non-number-literal character found, |
| 587 * or chunkEnd if the entire chunk is a valid number continuation. |
| 588 * Throws if a syntax error is detected. |
| 589 */ |
| 590 int parsePartialNumber(int position, int state) { |
| 591 int start = position; |
| 592 // Primitive implementation, can be optimized. |
| 593 _NumberBuffer buffer = this.buffer; |
| 594 this.buffer = null; |
| 595 int end = chunkEnd; |
| 596 toBailout: { |
| 597 if (position == end) break toBailout; |
| 598 int char = getChar(position); |
| 599 int digit = char ^ CHAR_0; |
| 600 if (state == NUM_SIGN) { |
| 601 if (digit <= 9) { |
| 602 if (digit == 0) { |
| 603 state = NUM_ZERO; |
| 604 } else { |
| 605 state = NUM_DIGIT; |
| 606 } |
| 607 position++; |
| 608 if (position == end) break toBailout; |
| 609 char = getChar(position); |
| 610 digit = char ^ CHAR_0; |
| 611 } else { |
| 612 return fail(position); |
| 613 } |
| 614 } |
| 615 if (state == NUM_ZERO) { |
| 616 // JSON does not allow insignificant leading zeros (e.g., "09"). |
| 617 if (digit <= 9) return fail(position); |
| 618 state = NUM_DIGIT; |
| 619 } |
| 620 while (state == NUM_DIGIT) { |
| 621 if (digit > 9) { |
| 622 if (char == DECIMALPOINT) { |
| 623 state = NUM_DOT; |
| 624 } else if ((char | 0x20) == CHAR_e) { |
| 625 state = NUM_E; |
| 626 } else { |
| 627 finishChunkNumber(state, start, position, buffer); |
| 628 return position; |
| 629 } |
| 630 } |
| 631 position++; |
| 632 if (position == end) break toBailout; |
| 633 char = getChar(position); |
| 634 digit = char ^ CHAR_0; |
| 635 } |
| 636 if (state == NUM_DOT) { |
| 637 if (digit > 9) return fail(position); |
| 638 state = NUM_DOT_DIGIT; |
| 639 } |
| 640 while (state == NUM_DOT_DIGIT) { |
| 641 if (digit > 9) { |
| 642 if ((char | 0x20) == CHAR_e) { |
| 643 state = NUM_E; |
| 644 } else { |
| 645 finishChunkNumber(state, start, position, buffer); |
| 646 return position; |
| 647 } |
| 648 } |
| 649 position++; |
| 650 if (position == end) break toBailout; |
| 651 char = getChar(position); |
| 652 digit = char ^ CHAR_0; |
| 653 } |
| 654 if (state == NUM_E) { |
| 655 if (char == PLUS || char == MINUS) { |
| 656 state = NUM_E_SIGN; |
| 657 position++; |
| 658 if (position == end) break toBailout; |
| 659 char = getChar(position); |
| 660 digit = char ^ CHAR_0; |
| 661 } |
| 662 } |
| 663 assert(state >= NUM_E); |
| 664 while (digit <= 9) { |
| 665 state = NUM_E_DIGIT; |
| 666 position++; |
| 667 if (position == end) break toBailout; |
| 668 char = getChar(position); |
| 669 digit = char ^ CHAR_0; |
| 670 } |
| 671 finishChunkNumber(state, start, position, buffer); |
| 672 return position; |
| 673 } |
| 674 // Bailout code in case the current chunk ends while parsing the numeral. |
| 675 assert(position == end); |
| 676 continueChunkNumber(state, start, buffer); |
| 677 return chunkEnd; |
| 678 } |
| 679 |
| 680 /** |
| 681 * Continues parsing a partial string literal. |
| 682 * |
| 683 * Handles partial escapes and then hands the parsing off to |
| 684 * [parseStringToBuffer]. |
| 685 */ |
| 686 int parsePartialString(int position, int partialState) { |
| 687 if (partialState == STR_PLAIN) { |
| 688 return parseStringToBuffer(position); |
| 689 } |
| 690 if (partialState == STR_ESCAPE) { |
| 691 position = parseStringEscape(position); |
| 692 // parseStringEscape sets partialState if it sees the end. |
| 693 if (position == chunkEnd) return position; |
| 694 return parseStringToBuffer(position); |
| 695 } |
| 696 assert((partialState & STR_U) != 0); |
| 697 int value = partialState >> STR_U_VALUE_SHIFT; |
| 698 int count = (partialState >> STR_U_COUNT_SHIFT) & TWO_BIT_MASK; |
| 699 for (int i = count; i < 4; i++, position++) { |
| 700 if (position == chunkEnd) return chunkStringEscapeU(i, value); |
| 701 int char = getChar(position); |
| 702 int digit = parseHexDigit(char); |
| 703 if (digit < 0) fail(position, "Invalid hex digit"); |
| 704 value = 16 * value + digit; |
| 705 } |
| 706 addCharToString(value); |
| 707 return parseStringToBuffer(position); |
| 708 } |
| 709 |
| 710 /** |
| 711 * Continues parsing a partial keyword. |
| 712 */ |
| 713 int parsePartialKeyword(int position, int partialState) { |
| 714 int keywordType = partialState & KWD_TYPE_MASK; |
| 715 int count = partialState >> KWD_COUNT_SHIFT; |
| 716 int keywordTypeIndex = keywordType >> KWD_TYPE_SHIFT; |
| 717 String keyword = const ["null", "true", "false"][keywordTypeIndex]; |
| 718 assert(count < keyword.length); |
| 719 do { |
| 720 if (position == chunkEnd) { |
| 721 this.partialState = |
| 722 PARTIAL_KEYWORD | keywordType | (count << KWD_COUNT_SHIFT); |
| 723 return chunkEnd; |
| 724 } |
| 725 int expectedChar = keyword.codeUnitAt(count); |
| 726 if (getChar(position) != expectedChar) return fail(position); |
| 727 position++; |
| 728 count++; |
| 729 } while (count < keyword.length); |
| 730 if (keywordType == KWD_NULL) { |
| 731 listener.handleNull(); |
| 732 } else { |
| 733 listener.handleBool(keywordType == KWD_TRUE); |
| 734 } |
| 735 return position; |
| 736 } |
| 737 |
| 738 /** Convert hex-digit to its value. Returns -1 if char is not a hex digit. */ |
| 739 int parseHexDigit(int char) { |
| 740 int digit = char ^ 0x30; |
| 741 if (digit <= 9) return digit; |
| 742 int letter = (char | 0x20) ^ 0x60; |
| 743 // values 1 .. 6 are 'a' through 'f' |
| 744 if (letter <= 6 && letter > 0) return letter + 9; |
| 745 return -1; |
| 746 } |
| 747 |
| 748 /** |
| 749 * Parses the current chunk as a chunk of JSON. |
| 750 * |
| 751 * Starts parsing at [position] and continues until [chunkEnd]. |
| 752 * Continues parsing where the previous chunk (if any) ended. |
| 753 */ |
| 754 void parse(int position) { |
| 755 int length = chunkEnd; |
| 756 if (partialState != NO_PARTIAL) { |
| 757 position = parsePartial(position); |
| 758 if (position == length) return; |
| 759 } |
| 760 int state = this.state; |
239 while (position < length) { | 761 while (position < length) { |
240 int char = source.codeUnitAt(position); | 762 int char = getChar(position); |
241 switch (char) { | 763 switch (char) { |
242 case SPACE: | 764 case SPACE: |
243 case CARRIAGE_RETURN: | 765 case CARRIAGE_RETURN: |
244 case NEWLINE: | 766 case NEWLINE: |
245 case TAB: | 767 case TAB: |
246 position++; | 768 position++; |
247 break; | 769 break; |
248 case QUOTE: | 770 case QUOTE: |
249 if ((state & ALLOW_STRING_MASK) != 0) fail(position); | 771 if ((state & ALLOW_STRING_MASK) != 0) return fail(position); |
| 772 state |= VALUE_READ_BITS; |
250 position = parseString(position + 1); | 773 position = parseString(position + 1); |
251 state |= VALUE_READ_BITS; | |
252 break; | 774 break; |
253 case LBRACKET: | 775 case LBRACKET: |
254 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); | 776 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position); |
255 listener.beginArray(); | 777 listener.beginArray(); |
256 states.add(state); | 778 saveState(state); |
257 state = STATE_ARRAY_EMPTY; | 779 state = STATE_ARRAY_EMPTY; |
258 position++; | 780 position++; |
259 break; | 781 break; |
260 case LBRACE: | 782 case LBRACE: |
261 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); | 783 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position); |
262 listener.beginObject(); | 784 listener.beginObject(); |
263 states.add(state); | 785 saveState(state); |
264 state = STATE_OBJECT_EMPTY; | 786 state = STATE_OBJECT_EMPTY; |
265 position++; | 787 position++; |
266 break; | 788 break; |
267 case CHAR_n: | 789 case CHAR_n: |
268 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); | 790 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position); |
| 791 state |= VALUE_READ_BITS; |
269 position = parseNull(position); | 792 position = parseNull(position); |
270 state |= VALUE_READ_BITS; | |
271 break; | 793 break; |
272 case CHAR_f: | 794 case CHAR_f: |
273 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); | 795 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position); |
| 796 state |= VALUE_READ_BITS; |
274 position = parseFalse(position); | 797 position = parseFalse(position); |
275 state |= VALUE_READ_BITS; | |
276 break; | 798 break; |
277 case CHAR_t: | 799 case CHAR_t: |
278 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); | 800 if ((state & ALLOW_VALUE_MASK) != 0) return fail(position); |
| 801 state |= VALUE_READ_BITS; |
279 position = parseTrue(position); | 802 position = parseTrue(position); |
280 state |= VALUE_READ_BITS; | |
281 break; | 803 break; |
282 case COLON: | 804 case COLON: |
283 if (state != STATE_OBJECT_KEY) fail(position); | 805 if (state != STATE_OBJECT_KEY) return fail(position); |
284 listener.propertyName(); | 806 listener.propertyName(); |
285 state = STATE_OBJECT_COLON; | 807 state = STATE_OBJECT_COLON; |
286 position++; | 808 position++; |
287 break; | 809 break; |
288 case COMMA: | 810 case COMMA: |
289 if (state == STATE_OBJECT_VALUE) { | 811 if (state == STATE_OBJECT_VALUE) { |
290 listener.propertyValue(); | 812 listener.propertyValue(); |
291 state = STATE_OBJECT_COMMA; | 813 state = STATE_OBJECT_COMMA; |
292 position++; | 814 position++; |
293 } else if (state == STATE_ARRAY_VALUE) { | 815 } else if (state == STATE_ARRAY_VALUE) { |
294 listener.arrayElement(); | 816 listener.arrayElement(); |
295 state = STATE_ARRAY_COMMA; | 817 state = STATE_ARRAY_COMMA; |
296 position++; | 818 position++; |
297 } else { | 819 } else { |
298 fail(position); | 820 return fail(position); |
299 } | 821 } |
300 break; | 822 break; |
301 case RBRACKET: | 823 case RBRACKET: |
302 if (state == STATE_ARRAY_EMPTY) { | 824 if (state == STATE_ARRAY_EMPTY) { |
303 listener.endArray(); | 825 listener.endArray(); |
304 } else if (state == STATE_ARRAY_VALUE) { | 826 } else if (state == STATE_ARRAY_VALUE) { |
305 listener.arrayElement(); | 827 listener.arrayElement(); |
306 listener.endArray(); | 828 listener.endArray(); |
307 } else { | 829 } else { |
308 fail(position); | 830 return fail(position); |
309 } | 831 } |
310 state = states.removeLast() | VALUE_READ_BITS; | 832 state = restoreState() | VALUE_READ_BITS; |
311 position++; | 833 position++; |
312 break; | 834 break; |
313 case RBRACE: | 835 case RBRACE: |
314 if (state == STATE_OBJECT_EMPTY) { | 836 if (state == STATE_OBJECT_EMPTY) { |
315 listener.endObject(); | 837 listener.endObject(); |
316 } else if (state == STATE_OBJECT_VALUE) { | 838 } else if (state == STATE_OBJECT_VALUE) { |
317 listener.propertyValue(); | 839 listener.propertyValue(); |
318 listener.endObject(); | 840 listener.endObject(); |
319 } else { | 841 } else { |
320 fail(position); | 842 return fail(position); |
321 } | 843 } |
322 state = states.removeLast() | VALUE_READ_BITS; | 844 state = restoreState() | VALUE_READ_BITS; |
323 position++; | 845 position++; |
324 break; | 846 break; |
325 default: | 847 default: |
326 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); | 848 if ((state & ALLOW_VALUE_MASK) != 0) fail(position); |
| 849 state |= VALUE_READ_BITS; |
327 position = parseNumber(char, position); | 850 position = parseNumber(char, position); |
328 state |= VALUE_READ_BITS; | |
329 break; | 851 break; |
330 } | 852 } |
331 } | 853 } |
332 if (state != STATE_END) fail(position); | 854 this.state = state; |
333 } | 855 } |
334 | 856 |
335 /** | 857 /** |
336 * Parses a "true" literal starting at [position]. | 858 * Parses a "true" literal starting at [position]. |
337 * | 859 * |
338 * [:source[position]:] must be "t". | 860 * [:source[position]:] must be "t". |
339 */ | 861 */ |
340 int parseTrue(int position) { | 862 int parseTrue(int position) { |
341 assert(source.codeUnitAt(position) == CHAR_t); | 863 assert(getChar(position) == CHAR_t); |
342 if (source.length < position + 4) fail(position, "Unexpected identifier"); | 864 if (chunkEnd < position + 4) { |
343 if (source.codeUnitAt(position + 1) != CHAR_r || | 865 return parseKeywordPrefix(position, "true", KWD_TRUE); |
344 source.codeUnitAt(position + 2) != CHAR_u || | 866 } |
345 source.codeUnitAt(position + 3) != CHAR_e) { | 867 if (getChar(position + 1) != CHAR_r || |
346 fail(position); | 868 getChar(position + 2) != CHAR_u || |
| 869 getChar(position + 3) != CHAR_e) { |
| 870 return fail(position); |
347 } | 871 } |
348 listener.handleBool(true); | 872 listener.handleBool(true); |
349 return position + 4; | 873 return position + 4; |
350 } | 874 } |
351 | 875 |
352 /** | 876 /** |
353 * Parses a "false" literal starting at [position]. | 877 * Parses a "false" literal starting at [position]. |
354 * | 878 * |
355 * [:source[position]:] must be "f". | 879 * [:source[position]:] must be "f". |
356 */ | 880 */ |
357 int parseFalse(int position) { | 881 int parseFalse(int position) { |
358 assert(source.codeUnitAt(position) == CHAR_f); | 882 assert(getChar(position) == CHAR_f); |
359 if (source.length < position + 5) fail(position, "Unexpected identifier"); | 883 if (chunkEnd < position + 5) { |
360 if (source.codeUnitAt(position + 1) != CHAR_a || | 884 return parseKeywordPrefix(position, "false", KWD_FALSE); |
361 source.codeUnitAt(position + 2) != CHAR_l || | 885 } |
362 source.codeUnitAt(position + 3) != CHAR_s || | 886 if (getChar(position + 1) != CHAR_a || |
363 source.codeUnitAt(position + 4) != CHAR_e) { | 887 getChar(position + 2) != CHAR_l || |
364 fail(position); | 888 getChar(position + 3) != CHAR_s || |
| 889 getChar(position + 4) != CHAR_e) { |
| 890 return fail(position); |
365 } | 891 } |
366 listener.handleBool(false); | 892 listener.handleBool(false); |
367 return position + 5; | 893 return position + 5; |
368 } | 894 } |
369 | 895 |
370 /** | 896 /** |
371 * Parses a "null" literal starting at [position]. | 897 * Parses a "null" literal starting at [position]. |
372 * | 898 * |
373 * [:source[position]:] must be "n". | 899 * [:source[position]:] must be "n". |
374 */ | 900 */ |
375 int parseNull(int position) { | 901 int parseNull(int position) { |
376 assert(source.codeUnitAt(position) == CHAR_n); | 902 assert(getChar(position) == CHAR_n); |
377 if (source.length < position + 4) fail(position, "Unexpected identifier"); | 903 if (chunkEnd < position + 4) { |
378 if (source.codeUnitAt(position + 1) != CHAR_u || | 904 return parseKeywordPrefix(position, "null", KWD_NULL); |
379 source.codeUnitAt(position + 2) != CHAR_l || | 905 } |
380 source.codeUnitAt(position + 3) != CHAR_l) { | 906 if (getChar(position + 1) != CHAR_u || |
381 fail(position); | 907 getChar(position + 2) != CHAR_l || |
| 908 getChar(position + 3) != CHAR_l) { |
| 909 return fail(position); |
382 } | 910 } |
383 listener.handleNull(); | 911 listener.handleNull(); |
384 return position + 4; | 912 return position + 4; |
385 } | 913 } |
386 | 914 |
| 915 int parseKeywordPrefix(int position, String chars, int type) { |
| 916 assert(getChar(position) == chars.codeUnitAt(0)); |
| 917 int length = chunkEnd; |
| 918 int start = position; |
| 919 int count = 1; |
| 920 while (++position < length) { |
| 921 int char = getChar(position); |
| 922 if (char != chars.codeUnitAt(count)) return fail(start); |
| 923 count++; |
| 924 } |
| 925 this.partialState = PARTIAL_KEYWORD | type | (count << KWD_COUNT_SHIFT); |
| 926 return length; |
| 927 } |
| 928 |
387 /** | 929 /** |
388 * Parses a string value. | 930 * Parses a string value. |
389 * | 931 * |
390 * Initial [position] is right after the initial quote. | 932 * Initial [position] is right after the initial quote. |
391 * Returned position right after the final quote. | 933 * Returned position right after the final quote. |
392 */ | 934 */ |
393 int parseString(int position) { | 935 int parseString(int position) { |
394 // Format: '"'([^\x00-\x1f\\\"]|'\\'[bfnrt/\\"])*'"' | 936 // Format: '"'([^\x00-\x1f\\\"]|'\\'[bfnrt/\\"])*'"' |
395 // Initial position is right after first '"'. | 937 // Initial position is right after first '"'. |
396 int start = position; | 938 int start = position; |
397 while (position < source.length) { | 939 int end = chunkEnd; |
398 int char = source.codeUnitAt(position++); | 940 while (position < end) { |
| 941 int char = getChar(position++); |
399 // BACKSLASH is larger than QUOTE and SPACE. | 942 // BACKSLASH is larger than QUOTE and SPACE. |
400 if (char > BACKSLASH) { | 943 if (char > BACKSLASH) { |
401 continue; | 944 continue; |
402 } | 945 } |
403 if (char == BACKSLASH) { | 946 if (char == BACKSLASH) { |
404 return parseStringWithEscapes(start, position - 1); | 947 beginString(); |
| 948 addSliceToString(start, position - 1); |
| 949 return parseStringToBuffer(position - 1); |
405 } | 950 } |
406 if (char == QUOTE) { | 951 if (char == QUOTE) { |
407 listener.handleString(source.substring(start, position - 1)); | 952 listener.handleString(getString(start, position - 1)); |
408 return position; | 953 return position; |
409 } | 954 } |
410 if (char < SPACE) { | 955 if (char < SPACE) { |
411 fail(position - 1, "Control character in string"); | 956 fail(position - 1, "Control character in string"); |
412 } | 957 } |
413 } | 958 } |
414 fail(start - 1, "Unterminated string"); | 959 beginString(); |
415 } | 960 addSliceToString(start, end); |
416 | 961 return chunkString(STR_PLAIN); |
417 int parseStringWithEscapes(start, position) { | 962 } |
418 // Backslash escape detected. Collect character codes for rest of string. | 963 |
419 int firstEscape = position; | 964 /** |
420 List<int> chars = <int>[]; | 965 * Sets up a partial string state. |
421 for (int i = start; i < firstEscape; i++) { | 966 * |
422 chars.add(source.codeUnitAt(i)); | 967 * The state is either not inside an escape, or right after a backslash. |
423 } | 968 * For partial strings ending inside a Unicode escape, use |
424 position++; | 969 * [chunkStringEscapeU]. |
| 970 */ |
| 971 int chunkString(int stringState) { |
| 972 partialState = PARTIAL_STRING | stringState; |
| 973 return chunkEnd; |
| 974 } |
| 975 |
| 976 /** |
| 977 * Sets up a partial string state for a partially parsed Unicode escape. |
| 978 * |
| 979 * The partial string state includes the current [buffer] and the |
| 980 * number of hex digits of the Unicode seen so far (e.g., for `"\u30') |
| 981 * the state knows that two digits have been seen, and what their value is. |
| 982 * |
| 983 * Returns [chunkEnd] so it can be used as part of a return statement. |
| 984 */ |
| 985 int chunkStringEscapeU(int count, int value) { |
| 986 partialState = PARTIAL_STRING | STR_U | |
| 987 (count << STR_U_COUNT_SHIFT) | |
| 988 (value << STR_U_VALUE_SHIFT); |
| 989 return chunkEnd; |
| 990 } |
| 991 |
| 992 /** |
| 993 * Parses the remainder of a string literal into a buffer. |
| 994 * |
| 995 * The buffer is stored in [buffer] and its underlying format depends on |
| 996 * the input chunk type. For example UTF-8 decoding happens in the |
| 997 * buffer, not in the parser, since all significant JSON characters are ASCII. |
| 998 * |
| 999 * This function scans through the string literal for escapes, and copies |
| 1000 * slices of non-escape characters using [addSliceToString]. |
| 1001 */ |
| 1002 int parseStringToBuffer(position) { |
| 1003 int end = chunkEnd; |
| 1004 int start = position; |
425 while (true) { | 1005 while (true) { |
426 if (position == source.length) { | 1006 if (position == end) { |
427 fail(start - 1, "Unterminated string"); | 1007 if (position > start) { |
428 } | 1008 addSliceToString(start, position); |
429 int char = source.codeUnitAt(position); | 1009 } |
430 switch (char) { | 1010 return chunkString(STR_PLAIN); |
431 case CHAR_b: char = BACKSPACE; break; | 1011 } |
432 case CHAR_f: char = FORM_FEED; break; | 1012 int char = getChar(position++); |
433 case CHAR_n: char = NEWLINE; break; | 1013 if (char > BACKSLASH) continue; |
434 case CHAR_r: char = CARRIAGE_RETURN; break; | 1014 if (char < SPACE) { |
435 case CHAR_t: char = TAB; break; | 1015 fail(position - 1); // Control character in string. |
436 case SLASH: | 1016 return; |
437 case BACKSLASH: | 1017 } |
438 case QUOTE: | 1018 if (char == QUOTE) { |
439 break; | 1019 int quotePosition = position - 1; |
440 case CHAR_u: | 1020 if (quotePosition > start) { |
441 int hexStart = position - 1; | 1021 addSliceToString(start, quotePosition); |
442 int value = 0; | 1022 } |
443 for (int i = 0; i < 4; i++) { | 1023 listener.handleString(endString()); |
444 position++; | 1024 return position; |
445 if (position == source.length) { | 1025 } |
446 fail(start - 1, "Unterminated string"); | 1026 if (char != BACKSLASH) { |
| 1027 continue; |
| 1028 } |
| 1029 // Handle escape. |
| 1030 if (position - 1 > start) { |
| 1031 addSliceToString(start, position - 1); |
| 1032 } |
| 1033 if (position == end) return chunkString(STR_ESCAPE); |
| 1034 position = parseStringEscape(position); |
| 1035 if (position == end) return position; |
| 1036 start = position; |
| 1037 } |
| 1038 return -1; // UNREACHABLE. |
| 1039 } |
| 1040 |
| 1041 /** |
| 1042 * Parse a string escape. |
| 1043 * |
| 1044 * Position is right after the initial backslash. |
| 1045 * The following escape is parsed into a character code which is added to |
| 1046 * the current string buffer using [addCharToString]. |
| 1047 * |
| 1048 * Returns position after the last character of the escape. |
| 1049 */ |
| 1050 int parseStringEscape(int position) { |
| 1051 int char = getChar(position++); |
| 1052 int length = chunkEnd; |
| 1053 switch (char) { |
| 1054 case CHAR_b: char = BACKSPACE; break; |
| 1055 case CHAR_f: char = FORM_FEED; break; |
| 1056 case CHAR_n: char = NEWLINE; break; |
| 1057 case CHAR_r: char = CARRIAGE_RETURN; break; |
| 1058 case CHAR_t: char = TAB; break; |
| 1059 case SLASH: |
| 1060 case BACKSLASH: |
| 1061 case QUOTE: |
| 1062 break; |
| 1063 case CHAR_u: |
| 1064 int hexStart = position - 1; |
| 1065 int value = 0; |
| 1066 for (int i = 0; i < 4; i++) { |
| 1067 if (position == length) return chunkStringEscapeU(i, value); |
| 1068 char = getChar(position++); |
| 1069 int digit = char ^ 0x30; |
| 1070 value *= 16; |
| 1071 if (digit <= 9) { |
| 1072 value += digit; |
| 1073 } else { |
| 1074 digit = (char | 0x20) - CHAR_a; |
| 1075 if (digit < 0 || digit > 5) { |
| 1076 return fail(hexStart, "Invalid unicode escape"); |
447 } | 1077 } |
448 char = source.codeUnitAt(position); | 1078 value += digit + 10; |
449 char -= 0x30; | |
450 if (char < 0) fail(hexStart, "Invalid unicode escape"); | |
451 if (char < 10) { | |
452 value = value * 16 + char; | |
453 } else { | |
454 char = (char | 0x20) - 0x31; | |
455 if (char < 0 || char > 5) { | |
456 fail(hexStart, "Invalid unicode escape"); | |
457 } | |
458 value = value * 16 + char + 10; | |
459 } | |
460 } | 1079 } |
461 char = value; | |
462 break; | |
463 default: | |
464 if (char < SPACE) fail(position, "Control character in string"); | |
465 fail(position, "Unrecognized string escape"); | |
466 } | |
467 do { | |
468 chars.add(char); | |
469 position++; | |
470 if (position == source.length) fail(start - 1, "Unterminated string"); | |
471 char = source.codeUnitAt(position); | |
472 if (char == QUOTE) { | |
473 String result = new String.fromCharCodes(chars); | |
474 listener.handleString(result); | |
475 return position + 1; | |
476 } | 1080 } |
477 if (char < SPACE) { | 1081 char = value; |
478 fail(position, "Control character in string"); | 1082 break; |
479 } | 1083 default: |
480 } while (char != BACKSLASH); | 1084 if (char < SPACE) return fail(position, "Control character in string"); |
481 position++; | 1085 return fail(position, "Unrecognized string escape"); |
482 } | 1086 } |
| 1087 addCharToString(char); |
| 1088 if (position == length) return chunkString(STR_PLAIN); |
| 1089 return position; |
| 1090 } |
| 1091 |
| 1092 /// Sets up a partial numeral state. |
| 1093 /// Returns chunkEnd to allow easy one-line bailout tests. |
| 1094 int beginChunkNumber(int state, int start) { |
| 1095 int end = chunkEnd; |
| 1096 int length = end - start; |
| 1097 var buffer = new _NumberBuffer(length); |
| 1098 copyCharsToList(start, end, buffer.list, 0); |
| 1099 buffer.length = length; |
| 1100 this.buffer = buffer; |
| 1101 this.partialState = PARTIAL_NUMERAL | state; |
| 1102 return end; |
| 1103 } |
| 1104 |
| 1105 void addNumberChunk(_NumberBuffer buffer, int start, int end, int overhead) { |
| 1106 int length = end - start; |
| 1107 int count = buffer.length; |
| 1108 int newCount = count + length; |
| 1109 int newCapacity = newCount + overhead; |
| 1110 buffer.ensureCapacity(newCapacity); |
| 1111 copyCharsToList(start, end, buffer.list, count); |
| 1112 buffer.length = newCount; |
| 1113 } |
| 1114 |
| 1115 // Continues an already chunked number accross an entire chunk. |
| 1116 int continueChunkNumber(int state, int start, _NumberBuffer buffer) { |
| 1117 int end = chunkEnd; |
| 1118 addNumberChunk(buffer, start, end, _NumberBuffer.kDefaultOverhead); |
| 1119 this.buffer = buffer; |
| 1120 this.partialState = PARTIAL_NUMERAL | state; |
| 1121 return end; |
| 1122 } |
| 1123 |
| 1124 int finishChunkNumber(int state, int start, int end, _NumberBuffer buffer) { |
| 1125 if (state == NUM_ZERO) { |
| 1126 listener.handleNumber(0); |
| 1127 return; |
| 1128 } |
| 1129 if (end > start) { |
| 1130 addNumberChunk(buffer, start, end, 0); |
| 1131 } |
| 1132 if (state == NUM_DIGIT) { |
| 1133 listener.handleNumber(buffer.parseInt()); |
| 1134 } else if (state == NUM_DOT_DIGIT || state == NUM_E_DIGIT) { |
| 1135 listener.handleNumber(buffer.parseDouble()); |
| 1136 } else { |
| 1137 fail(chunkEnd, "Unterminated number literal"); |
| 1138 } |
| 1139 return end; |
483 } | 1140 } |
484 | 1141 |
485 int parseNumber(int char, int position) { | 1142 int parseNumber(int char, int position) { |
486 // Also called on any unexpected character. | 1143 // Also called on any unexpected character. |
487 // Format: | 1144 // Format: |
488 // '-'?('0'|[1-9][0-9]*)('.'[0-9]+)?([eE][+-]?[0-9]+)? | 1145 // '-'?('0'|[1-9][0-9]*)('.'[0-9]+)?([eE][+-]?[0-9]+)? |
489 int start = position; | 1146 int start = position; |
490 int length = source.length; | 1147 int length = chunkEnd; |
491 int intValue = 0; // Collect int value while parsing. | 1148 // Collects an int value while parsing. Used for both an integer literal, |
492 int intSign = 1; | 1149 // an the exponent part of a double literal. |
| 1150 int intValue = 0; |
| 1151 double doubleValue = 0.0; // Collect double value while parsing. |
| 1152 int sign = 1; |
493 bool isDouble = false; | 1153 bool isDouble = false; |
494 // Break this block when the end of the number literal is reached. | 1154 // Break this block when the end of the number literal is reached. |
495 // At that time, position points to the next character, and isDouble | 1155 // At that time, position points to the next character, and isDouble |
496 // is set if the literal contains a decimal point or an exponential. | 1156 // is set if the literal contains a decimal point or an exponential. |
497 parsing: { | 1157 parsing: { |
498 if (char == MINUS) { | 1158 if (char == MINUS) { |
499 intSign = -1; | 1159 sign = -1; |
500 position++; | 1160 position++; |
501 if (position == length) fail(position, "Missing expected digit"); | 1161 if (position == length) return beginChunkNumber(NUM_SIGN, start); |
502 char = source.codeUnitAt(position); | 1162 char = getChar(position); |
503 } | 1163 } |
504 if (char < CHAR_0 || char > CHAR_9) { | 1164 int digit = char ^ CHAR_0; |
505 if (intSign < 0) { | 1165 if (digit > 9) { |
| 1166 if (sign < 0) { |
506 fail(position, "Missing expected digit"); | 1167 fail(position, "Missing expected digit"); |
507 } else { | 1168 } else { |
508 // If it doesn't even start out as a numeral. | 1169 // If it doesn't even start out as a numeral. |
509 fail(position, "Unexpected character"); | 1170 fail(position, "Unexpected character"); |
510 } | 1171 } |
511 } | 1172 } |
512 if (char == CHAR_0) { | 1173 if (digit == 0) { |
513 position++; | 1174 position++; |
514 if (position == length) break parsing; | 1175 if (position == length) return beginChunkNumber(NUM_ZERO, start); |
515 char = source.codeUnitAt(position); | 1176 char = getChar(position); |
516 if (CHAR_0 <= char && char <= CHAR_9) { | 1177 digit = char ^ CHAR_0; |
517 fail(position); | 1178 // If starting with zero, next character must not be digit. |
518 } | 1179 if (digit <= 9) fail(position); |
519 } else { | 1180 } else { |
520 do { | 1181 do { |
521 intValue = intValue * 10 + (char - CHAR_0); | 1182 intValue = 10 * intValue + digit; |
522 position++; | 1183 position++; |
523 if (position == length) break parsing; | 1184 if (position == length) return beginChunkNumber(NUM_DIGIT, start); |
524 char = source.codeUnitAt(position); | 1185 char = getChar(position); |
525 } while (CHAR_0 <= char && char <= CHAR_9); | 1186 digit = char ^ CHAR_0; |
| 1187 } while (digit <= 9); |
526 } | 1188 } |
527 if (char == DECIMALPOINT) { | 1189 if (char == DECIMALPOINT) { |
528 isDouble = true; | 1190 isDouble = true; |
| 1191 doubleValue = intValue.toDouble(); |
| 1192 intValue = 0; |
529 position++; | 1193 position++; |
530 if (position == length) fail(position, "Missing expected digit"); | 1194 if (position == length) return beginChunkNumber(NUM_DOT, start); |
531 char = source.codeUnitAt(position); | 1195 char = getChar(position); |
532 if (char < CHAR_0 || char > CHAR_9) fail(position); | 1196 digit = char ^ CHAR_0; |
| 1197 if (digit > 9) fail(position); |
533 do { | 1198 do { |
| 1199 doubleValue = 10.0 * doubleValue + digit; |
| 1200 intValue -= 1; |
534 position++; | 1201 position++; |
535 if (position == length) break parsing; | 1202 if (position == length) return beginChunkNumber(NUM_DOT_DIGIT, start); |
536 char = source.codeUnitAt(position); | 1203 char = getChar(position); |
537 } while (CHAR_0 <= char && char <= CHAR_9); | 1204 digit = char ^ CHAR_0; |
538 } | 1205 } while (digit <= 9); |
539 if (char == CHAR_e || char == CHAR_E) { | 1206 } |
540 isDouble = true; | 1207 if ((char | 0x20) == CHAR_e) { |
| 1208 if (!isDouble) { |
| 1209 doubleValue = intValue.toDouble(); |
| 1210 intValue = 0; |
| 1211 isDouble = true; |
| 1212 } |
541 position++; | 1213 position++; |
542 if (position == length) fail(position, "Missing expected digit"); | 1214 if (position == length) return beginChunkNumber(NUM_E, start); |
543 char = source.codeUnitAt(position); | 1215 char = getChar(position); |
| 1216 int expSign = 1; |
| 1217 int exponent = 0; |
544 if (char == PLUS || char == MINUS) { | 1218 if (char == PLUS || char == MINUS) { |
| 1219 expSign = 0x2C - char; // -1 for MINUS, +1 for PLUS |
545 position++; | 1220 position++; |
546 if (position == length) fail(position, "Missing expected digit"); | 1221 if (position == length) return beginChunkNumber(NUM_E_SIGN, start); |
547 char = source.codeUnitAt(position); | 1222 char = getChar(position); |
548 } | 1223 } |
549 if (char < CHAR_0 || char > CHAR_9) { | 1224 digit = char ^ CHAR_0; |
| 1225 if (digit > 9) { |
550 fail(position, "Missing expected digit"); | 1226 fail(position, "Missing expected digit"); |
551 } | 1227 } |
552 do { | 1228 do { |
| 1229 exponent = 10 * exponent + digit; |
553 position++; | 1230 position++; |
554 if (position == length) break parsing; | 1231 if (position == length) return beginChunkNumber(NUM_E_DIGIT, start); |
555 char = source.codeUnitAt(position); | 1232 char = getChar(position); |
556 } while (CHAR_0 <= char && char <= CHAR_9); | 1233 digit = char ^ CHAR_0; |
| 1234 } while (digit <= 9); |
| 1235 intValue += expSign * exponent; |
557 } | 1236 } |
558 } | 1237 } |
559 if (!isDouble) { | 1238 if (!isDouble) { |
560 listener.handleNumber(intSign * intValue); | 1239 listener.handleNumber(sign * intValue); |
561 return position; | 1240 return position; |
562 } | 1241 } |
563 // This correctly creates -0.0 for doubles. | 1242 // Double values at or above this value (2**53) may have lost precission. |
564 listener.handleNumber(_parseDouble(source, start, position)); | 1243 // Only trust results that are below this value. |
| 1244 const double maxExactDouble = 9007199254740992.0; |
| 1245 if (doubleValue < maxExactDouble) { |
| 1246 int exponent = intValue; |
| 1247 double signedMantissa = doubleValue * sign; |
| 1248 if (exponent >= -22) { |
| 1249 if (exponent < 0) { |
| 1250 listener.handleNumber(signedMantissa / POWERS_OF_TEN[-exponent]); |
| 1251 return position; |
| 1252 } |
| 1253 if (exponent == 0) { |
| 1254 listener.handleNumber(signedMantissa); |
| 1255 return position; |
| 1256 } |
| 1257 if (exponent <= 22) { |
| 1258 listener.handleNumber(signedMantissa * POWERS_OF_TEN[exponent]); |
| 1259 return position; |
| 1260 } |
| 1261 } |
| 1262 } |
| 1263 // If the value is outside the range +/-maxExactDouble or |
| 1264 // exponent is outside the range +/-22, then we can't trust simple double |
| 1265 // arithmetic to get the exact result, so we use the system double parsing. |
| 1266 listener.handleNumber(parseDouble(start, position)); |
565 return position; | 1267 return position; |
566 } | 1268 } |
567 | 1269 |
568 static double _parseDouble(String source, int start, int end) | 1270 int fail(int position, [String message]) { |
569 native "Double_parse"; | 1271 if (message == null) { |
570 | 1272 message = "Unexpected character"; |
571 void fail(int position, [String message]) { | 1273 if (position == chunkEnd) message = "Unexpected end of input"; |
572 if (message == null) message = "Unexpected character"; | 1274 } |
573 throw new FormatException(message, source, position); | 1275 throw new FormatException(message, chunk, position); |
574 } | 1276 } |
575 } | 1277 } |
| 1278 |
| 1279 /** |
| 1280 * Chunked JSON parser that parses [String] chunks. |
| 1281 */ |
| 1282 class _JsonStringParser extends _ChunkedJsonParser { |
| 1283 String chunk; |
| 1284 int chunkEnd; |
| 1285 |
| 1286 _JsonStringParser(_JsonListener listener) : super(listener); |
| 1287 |
| 1288 int getChar(int position) => chunk.codeUnitAt(position); |
| 1289 |
| 1290 String getString(int start, int end) { |
| 1291 return chunk.substring(start, end); |
| 1292 } |
| 1293 |
| 1294 void beginString() { |
| 1295 this.buffer = new StringBuffer(); |
| 1296 } |
| 1297 |
| 1298 void addSliceToString(int start, int end) { |
| 1299 StringBuffer buffer = this.buffer; |
| 1300 buffer.write(chunk.substring(start, end)); |
| 1301 } |
| 1302 |
| 1303 void addCharToString(int charCode) { |
| 1304 StringBuffer buffer = this.buffer; |
| 1305 buffer.writeCharCode(charCode); |
| 1306 } |
| 1307 |
| 1308 String endString() { |
| 1309 StringBuffer buffer = this.buffer; |
| 1310 this.buffer = null; |
| 1311 return buffer.toString(); |
| 1312 } |
| 1313 |
| 1314 void copyCharsToList(int start, int end, List target, int offset) { |
| 1315 int length = end - start; |
| 1316 for (int i = 0; i < length; i++) { |
| 1317 target[offset + i] = chunk.codeUnitAt(start + i); |
| 1318 } |
| 1319 } |
| 1320 |
| 1321 double parseDouble(int start, int end) { |
| 1322 return _parseDouble(chunk, start, end); |
| 1323 } |
| 1324 } |
| 1325 |
| 1326 patch class JsonDecoder { |
| 1327 /* patch */ StringConversionSink startChunkedConversion(Sink<Object> sink) { |
| 1328 return new _JsonStringDecoderSink(this._reviver, sink); |
| 1329 } |
| 1330 } |
| 1331 |
| 1332 /** |
| 1333 * Implements the chunked conversion from a JSON string to its corresponding |
| 1334 * object. |
| 1335 * |
| 1336 * The sink only creates one object, but its input can be chunked. |
| 1337 */ |
| 1338 class _JsonStringDecoderSink extends StringConversionSinkBase { |
| 1339 _ChunkedJsonParser _parser; |
| 1340 Function _reviver; |
| 1341 final Sink<Object> _sink; |
| 1342 |
| 1343 _JsonStringDecoderSink(reviver, this._sink) |
| 1344 : _reviver = reviver, _parser = _createParser(reviver); |
| 1345 |
| 1346 static _ChunkedJsonParser _createParser(reviver) { |
| 1347 _BuildJsonListener listener; |
| 1348 if (reviver == null) { |
| 1349 listener = new _BuildJsonListener(); |
| 1350 } else { |
| 1351 listener = new _ReviverJsonListener(reviver); |
| 1352 } |
| 1353 return new _JsonStringParser(listener); |
| 1354 } |
| 1355 |
| 1356 void addSlice(String chunk, int start, int end, bool isLast) { |
| 1357 _parser.chunk = chunk; |
| 1358 _parser.chunkEnd = end; |
| 1359 _parser.parse(start); |
| 1360 if (isLast) _parser.close(); |
| 1361 } |
| 1362 |
| 1363 void add(String chunk) { |
| 1364 addSlice(chunk, 0, chunk.length, false); |
| 1365 } |
| 1366 |
| 1367 void close() { |
| 1368 _parser.close(); |
| 1369 var decoded = _parser.result; |
| 1370 _sink.add(decoded); |
| 1371 _sink.close(); |
| 1372 } |
| 1373 |
| 1374 Utf8ConversionSink asUtf8Sink(bool allowMalformed) { |
| 1375 _parser = null; |
| 1376 return new _JsonUtf8DecoderSink(_reviver, _sink, allowMalformed); |
| 1377 } |
| 1378 } |
| 1379 |
| 1380 class _Utf8StringBuffer { |
| 1381 static const int INITIAL_CAPACITY = 32; |
| 1382 // Partial state encoding. |
| 1383 static const int MASK_TWO_BIT = 0x03; |
| 1384 static const int MASK_SIZE = MASK_TWO_BIT; |
| 1385 static const int SHIFT_MISSING = 2; |
| 1386 static const int SHIFT_VALUE = 4; |
| 1387 static const int NO_PARTIAL = 0; |
| 1388 |
| 1389 // UTF-8 encoding and limits. |
| 1390 static const int MAX_ASCII = 127; |
| 1391 static const int MAX_TWO_BYTE = 0x7ff; |
| 1392 static const int MAX_THREE_BYTE = 0xffff; |
| 1393 static const int MAX_UNICODE = 0X10ffff; |
| 1394 static const int MASK_TWO_BYTE = 0x1f; |
| 1395 static const int MASK_THREE_BYTE = 0x0f; |
| 1396 static const int MASK_FOUR_BYTE = 0x07; |
| 1397 static const int MASK_CONTINUE_TAG = 0xC0; |
| 1398 static const int MASK_CONTINUE_VALUE = 0x3f; |
| 1399 static const int CONTINUE_TAG = 0x80; |
| 1400 |
| 1401 // UTF-16 surrogate encoding. |
| 1402 static const int LEAD_SURROGATE = 0xD800; |
| 1403 static const int TAIL_SURROGATE = 0xDC00; |
| 1404 static const int SHIFT_HIGH_SURROGATE = 10; |
| 1405 static const int MASK_LOW_SURROGATE = 0x3ff; |
| 1406 |
| 1407 // The internal buffer starts as Uint8List, but may change to Uint16List |
| 1408 // if the string contains non-Latin-1 characters. |
| 1409 List<int> buffer = new Uint8List(INITIAL_CAPACITY); |
| 1410 // Number of elements in buffer. |
| 1411 int length = 0; |
| 1412 // Partial decoding state, for cases where an UTF-8 sequences is split |
| 1413 // between chunks. |
| 1414 int partialState = NO_PARTIAL; |
| 1415 // Whether all characters so far have been Latin-1 (and the buffer is |
| 1416 // still a Uint8List). Set to false when the first non-Latin-1 character |
| 1417 // is encountered, and the buffer is then also converted to a Uint16List. |
| 1418 bool isLatin1 = true; |
| 1419 // If allowing malformed, invalid UTF-8 sequences are converted to |
| 1420 // U+FFFD. |
| 1421 bool allowMalformed; |
| 1422 |
| 1423 _Utf8StringBuffer(this.allowMalformed); |
| 1424 |
| 1425 /** |
| 1426 * Parse the continuation of a multi-byte UTF-8 sequence. |
| 1427 * |
| 1428 * Parse [utf8] from [position] to [end]. If the sequence extends beyond |
| 1429 * `end`, store the partial state in [partialState], and continue from there |
| 1430 * on the next added slice. |
| 1431 * |
| 1432 * The [size] is the number of expected continuation bytes total, |
| 1433 * and [missing] is the number of remaining continuation bytes. |
| 1434 * The [size] is used to detect overlong encodings. |
| 1435 * The [value] is the value collected so far. |
| 1436 * |
| 1437 * When called after seeing the first multi-byte marker, the [size] and |
| 1438 * [missing] values are always the same, but they may differ if continuing |
| 1439 * after a partial sequence. |
| 1440 */ |
| 1441 int addContinuation(List<int> utf8, int position, int end, |
| 1442 int size, int missing, int value) { |
| 1443 int codeEnd = position + missing; |
| 1444 do { |
| 1445 if (position == end) { |
| 1446 missing = codeEnd - position; |
| 1447 partialState = |
| 1448 size | (missing << SHIFT_MISSING) | (value << SHIFT_VALUE); |
| 1449 return end; |
| 1450 } |
| 1451 int char = utf8[position]; |
| 1452 if ((char & MASK_CONTINUE_TAG) != CONTINUE_TAG) { |
| 1453 if (allowMalformed) { |
| 1454 addCharCode(0xFFFD); |
| 1455 return position; |
| 1456 } |
| 1457 throw new FormatException("Expected UTF-8 continuation byte, " |
| 1458 "found $char", utf8, position); |
| 1459 } |
| 1460 value = 64 * value + (char & MASK_CONTINUE_VALUE); |
| 1461 position++; |
| 1462 } while (position < codeEnd); |
| 1463 if (value <= const [0, MAX_ASCII, MAX_TWO_BYTE, MAX_THREE_BYTE][size]) { |
| 1464 // Over-long encoding. |
| 1465 if (allowMalformed) { |
| 1466 value = 0xFFFD; |
| 1467 } else { |
| 1468 throw new FormatException( |
| 1469 "Invalid encoding: U+${value.toRadixString(16).padLeft(4, '0')}" |
| 1470 " encoded in ${size + 1} bytes.", utf8, position - 1); |
| 1471 } |
| 1472 } |
| 1473 addCharCode(value); |
| 1474 return position; |
| 1475 } |
| 1476 |
| 1477 void addCharCode(int char) { |
| 1478 assert(char >= 0); |
| 1479 assert(char <= MAX_UNICODE); |
| 1480 if (partialState != NO_PARTIAL) { |
| 1481 if (allowMalformed) { |
| 1482 partialState = NO_PARTIAL; |
| 1483 addCharCode(0xFFFD); |
| 1484 } else { |
| 1485 throw new FormatException("Incomplete UTF-8 sequence", utf8); |
| 1486 } |
| 1487 } |
| 1488 if (isLatin1 && char > 0xff) { |
| 1489 _to16Bit(); // Also grows a little if close to full. |
| 1490 } |
| 1491 int length = this.length; |
| 1492 if (char <= MAX_THREE_BYTE) { |
| 1493 if (length == buffer.length) _grow(); |
| 1494 buffer[length] = char; |
| 1495 this.length = length + 1; |
| 1496 return; |
| 1497 } |
| 1498 if (length + 2 > buffer.length) _grow(); |
| 1499 int bits = char - 0x10000; |
| 1500 buffer[length] = LEAD_SURROGATE | (bits >> SHIFT_HIGH_SURROGATE); |
| 1501 buffer[length + 1] = TAIL_SURROGATE | (bits & MASK_LOW_SURROGATE); |
| 1502 this.length = length + 2; |
| 1503 } |
| 1504 |
| 1505 void _to16Bit() { |
| 1506 assert(isLatin1); |
| 1507 Uint16List newBuffer; |
| 1508 if ((length + INITIAL_CAPACITY) * 2 <= buffer.length) { |
| 1509 // Reuse existing buffer if it's big enough. |
| 1510 newBuffer = new Uint16List.view(buffer.buffer); |
| 1511 } else { |
| 1512 int newCapacity = buffer.length; |
| 1513 if (newCapacity - length < INITIAL_CAPACITY) { |
| 1514 newCapacity = length + INITIAL_CAPACITY; |
| 1515 } |
| 1516 newBuffer = new Uint16List(newCapacity); |
| 1517 } |
| 1518 newBuffer.setRange(0, length, buffer); |
| 1519 buffer = newBuffer; |
| 1520 isLatin1 = false; |
| 1521 } |
| 1522 |
| 1523 void _grow() { |
| 1524 int newCapacity = buffer.length * 2; |
| 1525 List newBuffer; |
| 1526 if (isLatin1) { |
| 1527 newBuffer = new Uint8List(newCapacity); |
| 1528 } else { |
| 1529 newBuffer = new Uint16List(newCapacity); |
| 1530 } |
| 1531 newBuffer.setRange(0, length, buffer); |
| 1532 buffer = newBuffer; |
| 1533 } |
| 1534 |
| 1535 void addSlice(List<int> utf8, int position, int end) { |
| 1536 assert(position < end); |
| 1537 if (partialState > 0) { |
| 1538 int continueByteCount = (partialState & MASK_TWO_BIT); |
| 1539 int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT; |
| 1540 int value = partialState >> SHIFT_VALUE; |
| 1541 partialState = NO_PARTIAL; |
| 1542 position = addContinuation(utf8, position, end, |
| 1543 continueByteCount, missing, value); |
| 1544 if (position == end) return; |
| 1545 } |
| 1546 // Keep index and capacity in local variables while looping over |
| 1547 // ASCII characters. |
| 1548 int index = length; |
| 1549 int capacity = buffer.length; |
| 1550 while (position < end) { |
| 1551 int char = utf8[position]; |
| 1552 if (char <= MAX_ASCII) { |
| 1553 if (index == capacity) { |
| 1554 length = index; |
| 1555 _grow(); |
| 1556 capacity = buffer.length; |
| 1557 } |
| 1558 buffer[index++] = char; |
| 1559 position++; |
| 1560 continue; |
| 1561 } |
| 1562 length = index; |
| 1563 if ((char & MASK_CONTINUE_TAG) == CONTINUE_TAG) { |
| 1564 if (allowMalformed) { |
| 1565 addCharCode(0xFFFD); |
| 1566 position++; |
| 1567 } else { |
| 1568 throw new FormatException("Unexepected UTF-8 continuation byte", |
| 1569 utf8, position); |
| 1570 } |
| 1571 } else if (char < 0xE0) { // C0-DF |
| 1572 // Two-byte. |
| 1573 position = addContinuation(utf8, position + 1, end, 1, 1, |
| 1574 char & MASK_TWO_BYTE); |
| 1575 } else if (char < 0xF0) { // E0-EF |
| 1576 // Three-byte. |
| 1577 position = addContinuation(utf8, position + 1, end, 2, 2, |
| 1578 char & MASK_THREE_BYTE); |
| 1579 } else if (char < 0xF8) { // F0-F7 |
| 1580 // Four-byte. |
| 1581 position = addContinuation(utf8, position + 1, end, 3, 3, |
| 1582 char & MASK_FOUR_BYTE); |
| 1583 } else { |
| 1584 if (allowMalformed) { |
| 1585 addCharCode(0xFFFD); |
| 1586 position++; |
| 1587 } else { |
| 1588 throw new FormatException("Invalid UTF-8 byte: $char", |
| 1589 utf8, position); |
| 1590 } |
| 1591 } |
| 1592 index = length; |
| 1593 capacity = buffer.length; |
| 1594 } |
| 1595 length = index; |
| 1596 } |
| 1597 |
| 1598 String toString() { |
| 1599 if (partialState != NO_PARTIAL) { |
| 1600 if (allowMalformed) { |
| 1601 partialState = NO_PARTIAL; |
| 1602 addCharCode(0xFFFD); |
| 1603 } else { |
| 1604 int continueByteCount = (partialState & MASK_TWO_BIT); |
| 1605 int missing = (partialState >> SHIFT_MISSING) & MASK_TWO_BIT; |
| 1606 int value = partialState >> SHIFT_VALUE; |
| 1607 int seenByteCount = continueByteCount - missing + 1; |
| 1608 List source = new Uint8List(seenByteCount); |
| 1609 while (seenByteCount > 1) { |
| 1610 seenByteCount--; |
| 1611 source[seenByteCount] = CONTINUE_TAG | (value & MASK_CONTINUE_VALUE); |
| 1612 value >>= 6; |
| 1613 } |
| 1614 source[0] = value | (0x3c0 >> (continueByteCount - 1)); |
| 1615 throw new FormatException("Incomplete UTF-8 sequence", |
| 1616 source, source.length); |
| 1617 } |
| 1618 } |
| 1619 return new String.fromCharCodes(buffer, 0, length); |
| 1620 } |
| 1621 } |
| 1622 |
| 1623 /** |
| 1624 * Chunked JSON parser that parses UTF-8 chunks. |
| 1625 */ |
| 1626 class _JsonUtf8Parser extends _ChunkedJsonParser { |
| 1627 final bool allowMalformed; |
| 1628 List<int> chunk; |
| 1629 int chunkEnd; |
| 1630 |
| 1631 _JsonUtf8Parser(_JsonListener listener, this.allowMalformed) |
| 1632 : super(listener); |
| 1633 |
| 1634 int getChar(int position) => chunk[position]; |
| 1635 |
| 1636 String getString(int start, int end) { |
| 1637 beginString(); |
| 1638 addSliceToString(start, end); |
| 1639 String result = endString(); |
| 1640 return result; |
| 1641 } |
| 1642 |
| 1643 void beginString() { |
| 1644 this.buffer = new _Utf8StringBuffer(allowMalformed); |
| 1645 } |
| 1646 |
| 1647 void addSliceToString(int start, int end) { |
| 1648 _Utf8StringBuffer buffer = this.buffer; |
| 1649 buffer.addSlice(chunk, start, end); |
| 1650 } |
| 1651 |
| 1652 void addCharToString(int charCode) { |
| 1653 _Utf8StringBuffer buffer = this.buffer; |
| 1654 buffer.addCharCode(charCode); |
| 1655 } |
| 1656 |
| 1657 String endString() { |
| 1658 _Utf8StringBuffer buffer = this.buffer; |
| 1659 this.buffer = null; |
| 1660 return buffer.toString(); |
| 1661 } |
| 1662 |
| 1663 void copyCharsToList(int start, int end, List target, int offset) { |
| 1664 int length = end - start; |
| 1665 target.setRange(offset, offset + length, chunk, start); |
| 1666 } |
| 1667 |
| 1668 double parseDouble(int start, int end) { |
| 1669 String string = getString(start, end); |
| 1670 reutrn _parseDouble(string, 0, string.length); |
| 1671 } |
| 1672 } |
| 1673 |
| 1674 double _parseDouble(String source, int start, int end) |
| 1675 native "Double_parse"; |
| 1676 |
| 1677 /** |
| 1678 * Implements the chunked conversion from a UTF-8 encoding of JSON |
| 1679 * to its corresponding object. |
| 1680 */ |
| 1681 class _JsonUtf8DecoderSink extends ByteConversionSinkBase { |
| 1682 _ChunkedUtf8Parser _parser; |
| 1683 final Sink<Object> _sink; |
| 1684 |
| 1685 _JsonUtf8DecoderSink(reviver, this._sink, bool allowMalformed) |
| 1686 : _parser = _createParser(reviver, allowMalformed); |
| 1687 |
| 1688 static _ChunkedJsonParser _createParser(reviver, bool allowMalformed) { |
| 1689 _BuildJsonListener listener; |
| 1690 if (reviver == null) { |
| 1691 listener = new _BuildJsonListener(); |
| 1692 } else { |
| 1693 listener = new _ReviverJsonListener(reviver); |
| 1694 } |
| 1695 return new _JsonUtf8Parser(listener, allowMalformed); |
| 1696 } |
| 1697 |
| 1698 void addSlice(List<int> chunk, int start, int end, bool isLast) { |
| 1699 _addChunk(chunk, start, end); |
| 1700 if (isLast) close(); |
| 1701 } |
| 1702 |
| 1703 void add(List<int> chunk) { |
| 1704 _addChunk(chunk, 0, chunk.length); |
| 1705 } |
| 1706 |
| 1707 void _addChunk(List<int> chunk, int start, int end) { |
| 1708 _parser.chunk = chunk; |
| 1709 _parser.chunkEnd = end; |
| 1710 _parser.parse(start); |
| 1711 } |
| 1712 |
| 1713 void close() { |
| 1714 _parser.close(); |
| 1715 var decoded = _parser.result; |
| 1716 _sink.add(decoded); |
| 1717 _sink.close(); |
| 1718 } |
| 1719 } |
OLD | NEW |