OLD | NEW |
(Empty) | |
| 1 #region Copyright notice and license |
| 2 // Protocol Buffers - Google's data interchange format |
| 3 // Copyright 2008 Google Inc. All rights reserved. |
| 4 // https://developers.google.com/protocol-buffers/ |
| 5 // |
| 6 // Redistribution and use in source and binary forms, with or without |
| 7 // modification, are permitted provided that the following conditions are |
| 8 // met: |
| 9 // |
| 10 // * Redistributions of source code must retain the above copyright |
| 11 // notice, this list of conditions and the following disclaimer. |
| 12 // * Redistributions in binary form must reproduce the above |
| 13 // copyright notice, this list of conditions and the following disclaimer |
| 14 // in the documentation and/or other materials provided with the |
| 15 // distribution. |
| 16 // * Neither the name of Google Inc. nor the names of its |
| 17 // contributors may be used to endorse or promote products derived from |
| 18 // this software without specific prior written permission. |
| 19 // |
| 20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 31 #endregion |
| 32 using System; |
| 33 using System.Collections.Generic; |
| 34 using System.Globalization; |
| 35 using System.IO; |
| 36 using System.Text; |
| 37 |
| 38 namespace Google.Protobuf |
| 39 { |
| 40 /// <summary> |
| 41 /// Simple but strict JSON tokenizer, rigidly following RFC 7159. |
| 42 /// </summary> |
| 43 /// <remarks> |
| 44 /// <para> |
| 45 /// This tokenizer is stateful, and only returns "useful" tokens - names, va
lues etc. |
| 46 /// It does not create tokens for the separator between names and values, or
for the comma |
| 47 /// between values. It validates the token stream as it goes - so callers ca
n assume that the |
| 48 /// tokens it produces are appropriate. For example, it would never produce
"start object, end array." |
| 49 /// </para> |
| 50 /// <para>Implementation details: the base class handles single token push-b
ack and </para> |
| 51 /// <para>Not thread-safe.</para> |
| 52 /// </remarks> |
| 53 internal abstract class JsonTokenizer |
| 54 { |
| 55 private JsonToken bufferedToken; |
| 56 |
| 57 /// <summary> |
| 58 /// Creates a tokenizer that reads from the given text reader. |
| 59 /// </summary> |
| 60 internal static JsonTokenizer FromTextReader(TextReader reader) |
| 61 { |
| 62 return new JsonTextTokenizer(reader); |
| 63 } |
| 64 |
| 65 /// <summary> |
| 66 /// Creates a tokenizer that first replays the given list of tokens, the
n continues reading |
| 67 /// from another tokenizer. Note that if the returned tokenizer is "push
ed back", that does not push back |
| 68 /// on the continuation tokenizer, or vice versa. Care should be taken w
hen using this method - it was |
| 69 /// created for the sake of Any parsing. |
| 70 /// </summary> |
| 71 internal static JsonTokenizer FromReplayedTokens(IList<JsonToken> tokens
, JsonTokenizer continuation) |
| 72 { |
| 73 return new JsonReplayTokenizer(tokens, continuation); |
| 74 } |
| 75 |
| 76 /// <summary> |
| 77 /// Returns the depth of the stack, purely in objects (not collections). |
| 78 /// Informally, this is the number of remaining unclosed '{' characters
we have. |
| 79 /// </summary> |
| 80 internal int ObjectDepth { get; private set; } |
| 81 |
| 82 // TODO: Why do we allow a different token to be pushed back? It might b
e better to always remember the previous |
| 83 // token returned, and allow a parameterless Rewind() method (which coul
d only be called once, just like the current PushBack). |
| 84 internal void PushBack(JsonToken token) |
| 85 { |
| 86 if (bufferedToken != null) |
| 87 { |
| 88 throw new InvalidOperationException("Can't push back twice"); |
| 89 } |
| 90 bufferedToken = token; |
| 91 if (token.Type == JsonToken.TokenType.StartObject) |
| 92 { |
| 93 ObjectDepth--; |
| 94 } |
| 95 else if (token.Type == JsonToken.TokenType.EndObject) |
| 96 { |
| 97 ObjectDepth++; |
| 98 } |
| 99 } |
| 100 |
| 101 /// <summary> |
| 102 /// Returns the next JSON token in the stream. An EndDocument token is r
eturned to indicate the end of the stream, |
| 103 /// after which point <c>Next()</c> should not be called again. |
| 104 /// </summary> |
| 105 /// <remarks>This implementation provides single-token buffering, and ca
lls <see cref="NextImpl"/> if there is no buffered token.</remarks> |
| 106 /// <returns>The next token in the stream. This is never null.</returns> |
| 107 /// <exception cref="InvalidOperationException">This method is called af
ter an EndDocument token has been returned</exception> |
| 108 /// <exception cref="InvalidJsonException">The input text does not compl
y with RFC 7159</exception> |
| 109 internal JsonToken Next() |
| 110 { |
| 111 JsonToken tokenToReturn; |
| 112 if (bufferedToken != null) |
| 113 { |
| 114 tokenToReturn = bufferedToken; |
| 115 bufferedToken = null; |
| 116 } |
| 117 else |
| 118 { |
| 119 tokenToReturn = NextImpl(); |
| 120 } |
| 121 if (tokenToReturn.Type == JsonToken.TokenType.StartObject) |
| 122 { |
| 123 ObjectDepth++; |
| 124 } |
| 125 else if (tokenToReturn.Type == JsonToken.TokenType.EndObject) |
| 126 { |
| 127 ObjectDepth--; |
| 128 } |
| 129 return tokenToReturn; |
| 130 } |
| 131 |
| 132 /// <summary> |
| 133 /// Returns the next JSON token in the stream, when requested by the bas
e class. (The <see cref="Next"/> method delegates |
| 134 /// to this if it doesn't have a buffered token.) |
| 135 /// </summary> |
| 136 /// <exception cref="InvalidOperationException">This method is called af
ter an EndDocument token has been returned</exception> |
| 137 /// <exception cref="InvalidJsonException">The input text does not compl
y with RFC 7159</exception> |
| 138 protected abstract JsonToken NextImpl(); |
| 139 |
| 140 /// <summary> |
| 141 /// Tokenizer which first exhausts a list of tokens, then consults anoth
er tokenizer. |
| 142 /// </summary> |
| 143 private class JsonReplayTokenizer : JsonTokenizer |
| 144 { |
| 145 private readonly IList<JsonToken> tokens; |
| 146 private readonly JsonTokenizer nextTokenizer; |
| 147 private int nextTokenIndex; |
| 148 |
| 149 internal JsonReplayTokenizer(IList<JsonToken> tokens, JsonTokenizer
nextTokenizer) |
| 150 { |
| 151 this.tokens = tokens; |
| 152 this.nextTokenizer = nextTokenizer; |
| 153 } |
| 154 |
| 155 // FIXME: Object depth not maintained... |
| 156 protected override JsonToken NextImpl() |
| 157 { |
| 158 if (nextTokenIndex >= tokens.Count) |
| 159 { |
| 160 return nextTokenizer.Next(); |
| 161 } |
| 162 return tokens[nextTokenIndex++]; |
| 163 } |
| 164 } |
| 165 |
| 166 /// <summary> |
| 167 /// Tokenizer which does all the *real* work of parsing JSON. |
| 168 /// </summary> |
| 169 private sealed class JsonTextTokenizer : JsonTokenizer |
| 170 { |
| 171 // The set of states in which a value is valid next token. |
| 172 private static readonly State ValueStates = State.ArrayStart | State
.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument; |
| 173 |
| 174 private readonly Stack<ContainerType> containerStack = new Stack<Con
tainerType>(); |
| 175 private readonly PushBackReader reader; |
| 176 private State state; |
| 177 |
| 178 internal JsonTextTokenizer(TextReader reader) |
| 179 { |
| 180 this.reader = new PushBackReader(reader); |
| 181 state = State.StartOfDocument; |
| 182 containerStack.Push(ContainerType.Document); |
| 183 } |
| 184 |
| 185 /// <remarks> |
| 186 /// This method essentially just loops through characters skipping w
hitespace, validating and |
| 187 /// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon) |
| 188 /// until it reaches something which will be a genuine token (e.g. a
start object, or a value) at which point |
| 189 /// it returns the token. Although the method is large, it would be
relatively hard to break down further... most |
| 190 /// of it is the large switch statement, which sometimes returns and
sometimes doesn't. |
| 191 /// </remarks> |
| 192 protected override JsonToken NextImpl() |
| 193 { |
| 194 if (state == State.ReaderExhausted) |
| 195 { |
| 196 throw new InvalidOperationException("Next() called after end
of document"); |
| 197 } |
| 198 while (true) |
| 199 { |
| 200 var next = reader.Read(); |
| 201 if (next == null) |
| 202 { |
| 203 ValidateState(State.ExpectedEndOfDocument, "Unexpected e
nd of document in state: "); |
| 204 state = State.ReaderExhausted; |
| 205 return JsonToken.EndDocument; |
| 206 } |
| 207 switch (next.Value) |
| 208 { |
| 209 // Skip whitespace between tokens |
| 210 case ' ': |
| 211 case '\t': |
| 212 case '\r': |
| 213 case '\n': |
| 214 break; |
| 215 case ':': |
| 216 ValidateState(State.ObjectBeforeColon, "Invalid stat
e to read a colon: "); |
| 217 state = State.ObjectAfterColon; |
| 218 break; |
| 219 case ',': |
| 220 ValidateState(State.ObjectAfterProperty | State.Arra
yAfterValue, "Invalid state to read a colon: "); |
| 221 state = state == State.ObjectAfterProperty ? State.O
bjectAfterComma : State.ArrayAfterComma; |
| 222 break; |
| 223 case '"': |
| 224 string stringValue = ReadString(); |
| 225 if ((state & (State.ObjectStart | State.ObjectAfterC
omma)) != 0) |
| 226 { |
| 227 state = State.ObjectBeforeColon; |
| 228 return JsonToken.Name(stringValue); |
| 229 } |
| 230 else |
| 231 { |
| 232 ValidateAndModifyStateForValue("Invalid state to
read a double quote: "); |
| 233 return JsonToken.Value(stringValue); |
| 234 } |
| 235 case '{': |
| 236 ValidateState(ValueStates, "Invalid state to read an
open brace: "); |
| 237 state = State.ObjectStart; |
| 238 containerStack.Push(ContainerType.Object); |
| 239 return JsonToken.StartObject; |
| 240 case '}': |
| 241 ValidateState(State.ObjectAfterProperty | State.Obje
ctStart, "Invalid state to read a close brace: "); |
| 242 PopContainer(); |
| 243 return JsonToken.EndObject; |
| 244 case '[': |
| 245 ValidateState(ValueStates, "Invalid state to read an
open square bracket: "); |
| 246 state = State.ArrayStart; |
| 247 containerStack.Push(ContainerType.Array); |
| 248 return JsonToken.StartArray; |
| 249 case ']': |
| 250 ValidateState(State.ArrayAfterValue | State.ArraySta
rt, "Invalid state to read a close square bracket: "); |
| 251 PopContainer(); |
| 252 return JsonToken.EndArray; |
| 253 case 'n': // Start of null |
| 254 ConsumeLiteral("null"); |
| 255 ValidateAndModifyStateForValue("Invalid state to rea
d a null literal: "); |
| 256 return JsonToken.Null; |
| 257 case 't': // Start of true |
| 258 ConsumeLiteral("true"); |
| 259 ValidateAndModifyStateForValue("Invalid state to rea
d a true literal: "); |
| 260 return JsonToken.True; |
| 261 case 'f': // Start of false |
| 262 ConsumeLiteral("false"); |
| 263 ValidateAndModifyStateForValue("Invalid state to rea
d a false literal: "); |
| 264 return JsonToken.False; |
| 265 case '-': // Start of a number |
| 266 case '0': |
| 267 case '1': |
| 268 case '2': |
| 269 case '3': |
| 270 case '4': |
| 271 case '5': |
| 272 case '6': |
| 273 case '7': |
| 274 case '8': |
| 275 case '9': |
| 276 double number = ReadNumber(next.Value); |
| 277 ValidateAndModifyStateForValue("Invalid state to rea
d a number token: "); |
| 278 return JsonToken.Value(number); |
| 279 default: |
| 280 throw new InvalidJsonException("Invalid first charac
ter of token: " + next.Value); |
| 281 } |
| 282 } |
| 283 } |
| 284 |
| 285 private void ValidateState(State validStates, string errorPrefix) |
| 286 { |
| 287 if ((validStates & state) == 0) |
| 288 { |
| 289 throw reader.CreateException(errorPrefix + state); |
| 290 } |
| 291 } |
| 292 |
| 293 /// <summary> |
| 294 /// Reads a string token. It is assumed that the opening " has alrea
dy been read. |
| 295 /// </summary> |
| 296 private string ReadString() |
| 297 { |
| 298 var value = new StringBuilder(); |
| 299 bool haveHighSurrogate = false; |
| 300 while (true) |
| 301 { |
| 302 char c = reader.ReadOrFail("Unexpected end of text while rea
ding string"); |
| 303 if (c < ' ') |
| 304 { |
| 305 throw reader.CreateException(string.Format(CultureInfo.I
nvariantCulture, "Invalid character in string literal: U+{0:x4}", (int) c)); |
| 306 } |
| 307 if (c == '"') |
| 308 { |
| 309 if (haveHighSurrogate) |
| 310 { |
| 311 throw reader.CreateException("Invalid use of surroga
te pair code units"); |
| 312 } |
| 313 return value.ToString(); |
| 314 } |
| 315 if (c == '\\') |
| 316 { |
| 317 c = ReadEscapedCharacter(); |
| 318 } |
| 319 // TODO: Consider only allowing surrogate pairs that are eit
her both escaped, |
| 320 // or both not escaped. It would be a very odd text stream t
hat contained a "lone" high surrogate |
| 321 // followed by an escaped low surrogate or vice versa... and
that couldn't even be represented in UTF-8. |
| 322 if (haveHighSurrogate != char.IsLowSurrogate(c)) |
| 323 { |
| 324 throw reader.CreateException("Invalid use of surrogate p
air code units"); |
| 325 } |
| 326 haveHighSurrogate = char.IsHighSurrogate(c); |
| 327 value.Append(c); |
| 328 } |
| 329 } |
| 330 |
| 331 /// <summary> |
| 332 /// Reads an escaped character. It is assumed that the leading backs
lash has already been read. |
| 333 /// </summary> |
| 334 private char ReadEscapedCharacter() |
| 335 { |
| 336 char c = reader.ReadOrFail("Unexpected end of text while reading
character escape sequence"); |
| 337 switch (c) |
| 338 { |
| 339 case 'n': |
| 340 return '\n'; |
| 341 case '\\': |
| 342 return '\\'; |
| 343 case 'b': |
| 344 return '\b'; |
| 345 case 'f': |
| 346 return '\f'; |
| 347 case 'r': |
| 348 return '\r'; |
| 349 case 't': |
| 350 return '\t'; |
| 351 case '"': |
| 352 return '"'; |
| 353 case '/': |
| 354 return '/'; |
| 355 case 'u': |
| 356 return ReadUnicodeEscape(); |
| 357 default: |
| 358 throw reader.CreateException(string.Format(CultureInfo.I
nvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (in
t) c)); |
| 359 } |
| 360 } |
| 361 |
| 362 /// <summary> |
| 363 /// Reads an escaped Unicode 4-nybble hex sequence. It is assumed th
at the leading \u has already been read. |
| 364 /// </summary> |
| 365 private char ReadUnicodeEscape() |
| 366 { |
| 367 int result = 0; |
| 368 for (int i = 0; i < 4; i++) |
| 369 { |
| 370 char c = reader.ReadOrFail("Unexpected end of text while rea
ding Unicode escape sequence"); |
| 371 int nybble; |
| 372 if (c >= '0' && c <= '9') |
| 373 { |
| 374 nybble = c - '0'; |
| 375 } |
| 376 else if (c >= 'a' && c <= 'f') |
| 377 { |
| 378 nybble = c - 'a' + 10; |
| 379 } |
| 380 else if (c >= 'A' && c <= 'F') |
| 381 { |
| 382 nybble = c - 'A' + 10; |
| 383 } |
| 384 else |
| 385 { |
| 386 throw reader.CreateException(string.Format(CultureInfo.I
nvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (in
t) c)); |
| 387 } |
| 388 result = (result << 4) + nybble; |
| 389 } |
| 390 return (char) result; |
| 391 } |
| 392 |
| 393 /// <summary> |
| 394 /// Consumes a text-only literal, throwing an exception if the read
text doesn't match it. |
| 395 /// It is assumed that the first letter of the literal has already b
een read. |
| 396 /// </summary> |
| 397 private void ConsumeLiteral(string text) |
| 398 { |
| 399 for (int i = 1; i < text.Length; i++) |
| 400 { |
| 401 char? next = reader.Read(); |
| 402 if (next == null) |
| 403 { |
| 404 throw reader.CreateException("Unexpected end of text whi
le reading literal token " + text); |
| 405 } |
| 406 if (next.Value != text[i]) |
| 407 { |
| 408 throw reader.CreateException("Unexpected character while
reading literal token " + text); |
| 409 } |
| 410 } |
| 411 } |
| 412 |
| 413 private double ReadNumber(char initialCharacter) |
| 414 { |
| 415 StringBuilder builder = new StringBuilder(); |
| 416 if (initialCharacter == '-') |
| 417 { |
| 418 builder.Append("-"); |
| 419 } |
| 420 else |
| 421 { |
| 422 reader.PushBack(initialCharacter); |
| 423 } |
| 424 // Each method returns the character it read that doesn't belong
in that part, |
| 425 // so we know what to do next, including pushing the character b
ack at the end. |
| 426 // null is returned for "end of text". |
| 427 char? next = ReadInt(builder); |
| 428 if (next == '.') |
| 429 { |
| 430 next = ReadFrac(builder); |
| 431 } |
| 432 if (next == 'e' || next == 'E') |
| 433 { |
| 434 next = ReadExp(builder); |
| 435 } |
| 436 // If we read a character which wasn't part of the number, push
it back so we can read it again |
| 437 // to parse the next token. |
| 438 if (next != null) |
| 439 { |
| 440 reader.PushBack(next.Value); |
| 441 } |
| 442 |
| 443 // TODO: What exception should we throw if the value can't be re
presented as a double? |
| 444 try |
| 445 { |
| 446 return double.Parse(builder.ToString(), |
| 447 NumberStyles.AllowLeadingSign | NumberStyles.AllowDecima
lPoint | NumberStyles.AllowExponent, |
| 448 CultureInfo.InvariantCulture); |
| 449 } |
| 450 catch (OverflowException) |
| 451 { |
| 452 throw reader.CreateException("Numeric value out of range: "
+ builder); |
| 453 } |
| 454 } |
| 455 |
| 456 private char? ReadInt(StringBuilder builder) |
| 457 { |
| 458 char first = reader.ReadOrFail("Invalid numeric literal"); |
| 459 if (first < '0' || first > '9') |
| 460 { |
| 461 throw reader.CreateException("Invalid numeric literal"); |
| 462 } |
| 463 builder.Append(first); |
| 464 int digitCount; |
| 465 char? next = ConsumeDigits(builder, out digitCount); |
| 466 if (first == '0' && digitCount != 0) |
| 467 { |
| 468 throw reader.CreateException("Invalid numeric literal: leadi
ng 0 for non-zero value."); |
| 469 } |
| 470 return next; |
| 471 } |
| 472 |
| 473 private char? ReadFrac(StringBuilder builder) |
| 474 { |
| 475 builder.Append('.'); // Already consumed this |
| 476 int digitCount; |
| 477 char? next = ConsumeDigits(builder, out digitCount); |
| 478 if (digitCount == 0) |
| 479 { |
| 480 throw reader.CreateException("Invalid numeric literal: fract
ion with no trailing digits"); |
| 481 } |
| 482 return next; |
| 483 } |
| 484 |
| 485 private char? ReadExp(StringBuilder builder) |
| 486 { |
| 487 builder.Append('E'); // Already consumed this (or 'e') |
| 488 char? next = reader.Read(); |
| 489 if (next == null) |
| 490 { |
| 491 throw reader.CreateException("Invalid numeric literal: expon
ent with no trailing digits"); |
| 492 } |
| 493 if (next == '-' || next == '+') |
| 494 { |
| 495 builder.Append(next.Value); |
| 496 } |
| 497 else |
| 498 { |
| 499 reader.PushBack(next.Value); |
| 500 } |
| 501 int digitCount; |
| 502 next = ConsumeDigits(builder, out digitCount); |
| 503 if (digitCount == 0) |
| 504 { |
| 505 throw reader.CreateException("Invalid numeric literal: expon
ent without value"); |
| 506 } |
| 507 return next; |
| 508 } |
| 509 |
| 510 private char? ConsumeDigits(StringBuilder builder, out int count) |
| 511 { |
| 512 count = 0; |
| 513 while (true) |
| 514 { |
| 515 char? next = reader.Read(); |
| 516 if (next == null || next.Value < '0' || next.Value > '9') |
| 517 { |
| 518 return next; |
| 519 } |
| 520 count++; |
| 521 builder.Append(next.Value); |
| 522 } |
| 523 } |
| 524 |
| 525 /// <summary> |
| 526 /// Validates that we're in a valid state to read a value (using the
given error prefix if necessary) |
| 527 /// and changes the state to the appropriate one, e.g. ObjectAfterCo
lon to ObjectAfterProperty. |
| 528 /// </summary> |
| 529 private void ValidateAndModifyStateForValue(string errorPrefix) |
| 530 { |
| 531 ValidateState(ValueStates, errorPrefix); |
| 532 switch (state) |
| 533 { |
| 534 case State.StartOfDocument: |
| 535 state = State.ExpectedEndOfDocument; |
| 536 return; |
| 537 case State.ObjectAfterColon: |
| 538 state = State.ObjectAfterProperty; |
| 539 return; |
| 540 case State.ArrayStart: |
| 541 case State.ArrayAfterComma: |
| 542 state = State.ArrayAfterValue; |
| 543 return; |
| 544 default: |
| 545 throw new InvalidOperationException("ValidateAndModifySt
ateForValue does not handle all value states (and should)"); |
| 546 } |
| 547 } |
| 548 |
| 549 /// <summary> |
| 550 /// Pops the top-most container, and sets the state to the appropria
te one for the end of a value |
| 551 /// in the parent container. |
| 552 /// </summary> |
| 553 private void PopContainer() |
| 554 { |
| 555 containerStack.Pop(); |
| 556 var parent = containerStack.Peek(); |
| 557 switch (parent) |
| 558 { |
| 559 case ContainerType.Object: |
| 560 state = State.ObjectAfterProperty; |
| 561 break; |
| 562 case ContainerType.Array: |
| 563 state = State.ArrayAfterValue; |
| 564 break; |
| 565 case ContainerType.Document: |
| 566 state = State.ExpectedEndOfDocument; |
| 567 break; |
| 568 default: |
| 569 throw new InvalidOperationException("Unexpected containe
r type: " + parent); |
| 570 } |
| 571 } |
| 572 |
| 573 private enum ContainerType |
| 574 { |
| 575 Document, Object, Array |
| 576 } |
| 577 |
| 578 /// <summary> |
| 579 /// Possible states of the tokenizer. |
| 580 /// </summary> |
| 581 /// <remarks> |
| 582 /// <para>This is a flags enum purely so we can simply and efficient
ly represent a set of valid states |
| 583 /// for checking.</para> |
| 584 /// <para> |
| 585 /// Each is documented with an example, |
| 586 /// where ^ represents the current position within the text stream.
The examples all use string values, |
| 587 /// but could be any value, including nested objects/arrays. |
| 588 /// The complete state of the tokenizer also includes a stack to ind
icate the contexts (arrays/objects). |
| 589 /// Any additional notional state of "AfterValue" indicates that a v
alue has been completed, at which |
| 590 /// point there's an immediate transition to ExpectedEndOfDocument,
ObjectAfterProperty or ArrayAfterValue. |
| 591 /// </para> |
| 592 /// <para> |
| 593 /// These states were derived manually by reading RFC 7159 carefully
. |
| 594 /// </para> |
| 595 /// </remarks> |
| 596 [Flags] |
| 597 private enum State |
| 598 { |
| 599 /// <summary> |
| 600 /// ^ { "foo": "bar" } |
| 601 /// Before the value in a document. Next states: ObjectStart, Ar
rayStart, "AfterValue" |
| 602 /// </summary> |
| 603 StartOfDocument = 1 << 0, |
| 604 /// <summary> |
| 605 /// { "foo": "bar" } ^ |
| 606 /// After the value in a document. Next states: ReaderExhausted |
| 607 /// </summary> |
| 608 ExpectedEndOfDocument = 1 << 1, |
| 609 /// <summary> |
| 610 /// { "foo": "bar" } ^ (and already read to the end of the reade
r) |
| 611 /// Terminal state. |
| 612 /// </summary> |
| 613 ReaderExhausted = 1 << 2, |
| 614 /// <summary> |
| 615 /// { ^ "foo": "bar" } |
| 616 /// Before the *first* property in an object. |
| 617 /// Next states: |
| 618 /// "AfterValue" (empty object) |
| 619 /// ObjectBeforeColon (read a name) |
| 620 /// </summary> |
| 621 ObjectStart = 1 << 3, |
| 622 /// <summary> |
| 623 /// { "foo" ^ : "bar", "x": "y" } |
| 624 /// Next state: ObjectAfterColon |
| 625 /// </summary> |
| 626 ObjectBeforeColon = 1 << 4, |
| 627 /// <summary> |
| 628 /// { "foo" : ^ "bar", "x": "y" } |
| 629 /// Before any property other than the first in an object. |
| 630 /// (Equivalently: after any property in an object) |
| 631 /// Next states: |
| 632 /// "AfterValue" (value is simple) |
| 633 /// ObjectStart (value is object) |
| 634 /// ArrayStart (value is array) |
| 635 /// </summary> |
| 636 ObjectAfterColon = 1 << 5, |
| 637 /// <summary> |
| 638 /// { "foo" : "bar" ^ , "x" : "y" } |
| 639 /// At the end of a property, so expecting either a comma or end
-of-object |
| 640 /// Next states: ObjectAfterComma or "AfterValue" |
| 641 /// </summary> |
| 642 ObjectAfterProperty = 1 << 6, |
| 643 /// <summary> |
| 644 /// { "foo":"bar", ^ "x":"y" } |
| 645 /// Read the comma after the previous property, so expecting ano
ther property. |
| 646 /// This is like ObjectStart, but closing brace isn't valid here |
| 647 /// Next state: ObjectBeforeColon. |
| 648 /// </summary> |
| 649 ObjectAfterComma = 1 << 7, |
| 650 /// <summary> |
| 651 /// [ ^ "foo", "bar" ] |
| 652 /// Before the *first* value in an array. |
| 653 /// Next states: |
| 654 /// "AfterValue" (read a value) |
| 655 /// "AfterValue" (end of array; will pop stack) |
| 656 /// </summary> |
| 657 ArrayStart = 1 << 8, |
| 658 /// <summary> |
| 659 /// [ "foo" ^ , "bar" ] |
| 660 /// After any value in an array, so expecting either a comma or
end-of-array |
| 661 /// Next states: ArrayAfterComma or "AfterValue" |
| 662 /// </summary> |
| 663 ArrayAfterValue = 1 << 9, |
| 664 /// <summary> |
| 665 /// [ "foo", ^ "bar" ] |
| 666 /// After a comma in an array, so there *must* be another value
(simple or complex). |
| 667 /// Next states: "AfterValue" (simple value), StartObject, Start
Array |
| 668 /// </summary> |
| 669 ArrayAfterComma = 1 << 10 |
| 670 } |
| 671 |
| 672 /// <summary> |
| 673 /// Wrapper around a text reader allowing small amounts of buffering
and location handling. |
| 674 /// </summary> |
| 675 private class PushBackReader |
| 676 { |
| 677 // TODO: Add locations for errors etc. |
| 678 |
| 679 private readonly TextReader reader; |
| 680 |
| 681 internal PushBackReader(TextReader reader) |
| 682 { |
| 683 // TODO: Wrap the reader in a BufferedReader? |
| 684 this.reader = reader; |
| 685 } |
| 686 |
| 687 /// <summary> |
| 688 /// The buffered next character, if we have one. |
| 689 /// </summary> |
| 690 private char? nextChar; |
| 691 |
| 692 /// <summary> |
| 693 /// Returns the next character in the stream, or null if we have
reached the end. |
| 694 /// </summary> |
| 695 /// <returns></returns> |
| 696 internal char? Read() |
| 697 { |
| 698 if (nextChar != null) |
| 699 { |
| 700 char? tmp = nextChar; |
| 701 nextChar = null; |
| 702 return tmp; |
| 703 } |
| 704 int next = reader.Read(); |
| 705 return next == -1 ? null : (char?) next; |
| 706 } |
| 707 |
| 708 internal char ReadOrFail(string messageOnFailure) |
| 709 { |
| 710 char? next = Read(); |
| 711 if (next == null) |
| 712 { |
| 713 throw CreateException(messageOnFailure); |
| 714 } |
| 715 return next.Value; |
| 716 } |
| 717 |
| 718 internal void PushBack(char c) |
| 719 { |
| 720 if (nextChar != null) |
| 721 { |
| 722 throw new InvalidOperationException("Cannot push back wh
en already buffering a character"); |
| 723 } |
| 724 nextChar = c; |
| 725 } |
| 726 |
| 727 /// <summary> |
| 728 /// Creates a new exception appropriate for the current state of
the reader. |
| 729 /// </summary> |
| 730 internal InvalidJsonException CreateException(string message) |
| 731 { |
| 732 // TODO: Keep track of and use the location. |
| 733 return new InvalidJsonException(message); |
| 734 } |
| 735 } |
| 736 } |
| 737 } |
| 738 } |
OLD | NEW |