OLD | NEW |
(Empty) | |
| 1 // Copyright 2007-2011 Baptiste Lepilleur |
| 2 // Distributed under MIT license, or public domain if desired and |
| 3 // recognized in your jurisdiction. |
| 4 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE |
| 5 |
| 6 #if !defined(JSON_IS_AMALGAMATION) |
| 7 # include <json/assertions.h> |
| 8 # include <json/reader.h> |
| 9 # include <json/value.h> |
| 10 # include "json_tool.h" |
| 11 #endif // if !defined(JSON_IS_AMALGAMATION) |
| 12 #include <utility> |
| 13 #include <cstdio> |
| 14 #include <cassert> |
| 15 #include <cstring> |
| 16 #include <stdexcept> |
| 17 #ifdef __pnacl__ |
| 18 // This file uses the following headers (at least in Reader::parse), but |
| 19 // the upstream version doesn't include them because iostream pulls in |
| 20 // static initializers. This breaks the PNaCl build because it uses |
| 21 // libc++ which declares getline in <string> (as per the C++ standard) |
| 22 // but defines it in <iostream>. The code therefore fails linking, which |
| 23 // these includes fix. |
| 24 #include <string> |
| 25 #include <iostream> |
| 26 #endif |
| 27 |
| 28 #if _MSC_VER >= 1400 // VC++ 8.0 |
| 29 #pragma warning( disable : 4996 ) // disable warning about strdup being deprec
ated. |
| 30 #endif |
| 31 |
| 32 namespace Json { |
| 33 |
| 34 // Implementation of class Features |
| 35 // //////////////////////////////// |
| 36 |
| 37 Features::Features() |
| 38 : allowComments_( true ) |
| 39 , strictRoot_( false ) |
| 40 { |
| 41 } |
| 42 |
| 43 |
| 44 Features |
| 45 Features::all() |
| 46 { |
| 47 return Features(); |
| 48 } |
| 49 |
| 50 |
| 51 Features |
| 52 Features::strictMode() |
| 53 { |
| 54 Features features; |
| 55 features.allowComments_ = false; |
| 56 features.strictRoot_ = true; |
| 57 return features; |
| 58 } |
| 59 |
| 60 // Implementation of class Reader |
| 61 // //////////////////////////////// |
| 62 |
| 63 |
| 64 static inline bool |
| 65 in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::C
har c4 ) |
| 66 { |
| 67 return c == c1 || c == c2 || c == c3 || c == c4; |
| 68 } |
| 69 |
| 70 static inline bool |
| 71 in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::C
har c4, Reader::Char c5 ) |
| 72 { |
| 73 return c == c1 || c == c2 || c == c3 || c == c4 || c == c5; |
| 74 } |
| 75 |
| 76 |
| 77 static bool |
| 78 containsNewLine( Reader::Location begin, |
| 79 Reader::Location end ) |
| 80 { |
| 81 for ( ;begin < end; ++begin ) |
| 82 if ( *begin == '\n' || *begin == '\r' ) |
| 83 return true; |
| 84 return false; |
| 85 } |
| 86 |
| 87 |
| 88 // Class Reader |
| 89 // ////////////////////////////////////////////////////////////////// |
| 90 |
| 91 Reader::Reader() |
| 92 : errors_(), |
| 93 document_(), |
| 94 begin_(), |
| 95 end_(), |
| 96 current_(), |
| 97 lastValueEnd_(), |
| 98 lastValue_(), |
| 99 commentsBefore_(), |
| 100 features_( Features::all() ), |
| 101 collectComments_() |
| 102 { |
| 103 } |
| 104 |
| 105 |
| 106 Reader::Reader( const Features &features ) |
| 107 : errors_(), |
| 108 document_(), |
| 109 begin_(), |
| 110 end_(), |
| 111 current_(), |
| 112 lastValueEnd_(), |
| 113 lastValue_(), |
| 114 commentsBefore_(), |
| 115 features_( features ), |
| 116 collectComments_() |
| 117 { |
| 118 } |
| 119 |
| 120 |
| 121 bool |
| 122 Reader::parse( const std::string &document, |
| 123 Value &root, |
| 124 bool collectComments ) |
| 125 { |
| 126 document_ = document; |
| 127 const char *begin = document_.c_str(); |
| 128 const char *end = begin + document_.length(); |
| 129 return parse( begin, end, root, collectComments ); |
| 130 } |
| 131 |
| 132 |
| 133 bool |
| 134 Reader::parse( std::istream& sin, |
| 135 Value &root, |
| 136 bool collectComments ) |
| 137 { |
| 138 //std::istream_iterator<char> begin(sin); |
| 139 //std::istream_iterator<char> end; |
| 140 // Those would allow streamed input from a file, if parse() were a |
| 141 // template function. |
| 142 |
| 143 // Since std::string is reference-counted, this at least does not |
| 144 // create an extra copy. |
| 145 std::string doc; |
| 146 std::getline(sin, doc, (char)EOF); |
| 147 return parse( doc, root, collectComments ); |
| 148 } |
| 149 |
| 150 bool |
| 151 Reader::parse( const char *beginDoc, const char *endDoc, |
| 152 Value &root, |
| 153 bool collectComments ) |
| 154 { |
| 155 if ( !features_.allowComments_ ) |
| 156 { |
| 157 collectComments = false; |
| 158 } |
| 159 |
| 160 begin_ = beginDoc; |
| 161 end_ = endDoc; |
| 162 collectComments_ = collectComments; |
| 163 current_ = begin_; |
| 164 lastValueEnd_ = 0; |
| 165 lastValue_ = 0; |
| 166 commentsBefore_ = ""; |
| 167 errors_.clear(); |
| 168 while ( !nodes_.empty() ) |
| 169 nodes_.pop(); |
| 170 nodes_.push( &root ); |
| 171 |
| 172 bool successful = readValue(); |
| 173 Token token; |
| 174 skipCommentTokens( token ); |
| 175 if ( collectComments_ && !commentsBefore_.empty() ) |
| 176 root.setComment( commentsBefore_, commentAfter ); |
| 177 if ( features_.strictRoot_ ) |
| 178 { |
| 179 if ( !root.isArray() && !root.isObject() ) |
| 180 { |
| 181 // Set error location to start of doc, ideally should be first token fo
und in doc |
| 182 token.type_ = tokenError; |
| 183 token.start_ = beginDoc; |
| 184 token.end_ = endDoc; |
| 185 addError( "A valid JSON document must be either an array or an object v
alue.", |
| 186 token ); |
| 187 return false; |
| 188 } |
| 189 } |
| 190 return successful; |
| 191 } |
| 192 |
| 193 |
| 194 bool |
| 195 Reader::readValue() |
| 196 { |
| 197 Token token; |
| 198 skipCommentTokens( token ); |
| 199 bool successful = true; |
| 200 |
| 201 if ( collectComments_ && !commentsBefore_.empty() ) |
| 202 { |
| 203 currentValue().setComment( commentsBefore_, commentBefore ); |
| 204 commentsBefore_ = ""; |
| 205 } |
| 206 |
| 207 |
| 208 switch ( token.type_ ) |
| 209 { |
| 210 case tokenObjectBegin: |
| 211 successful = readObject( token ); |
| 212 break; |
| 213 case tokenArrayBegin: |
| 214 successful = readArray( token ); |
| 215 break; |
| 216 case tokenNumber: |
| 217 successful = decodeNumber( token ); |
| 218 break; |
| 219 case tokenString: |
| 220 successful = decodeString( token ); |
| 221 break; |
| 222 case tokenTrue: |
| 223 currentValue() = true; |
| 224 break; |
| 225 case tokenFalse: |
| 226 currentValue() = false; |
| 227 break; |
| 228 case tokenNull: |
| 229 currentValue() = Value(); |
| 230 break; |
| 231 default: |
| 232 return addError( "Syntax error: value, object or array expected.", token )
; |
| 233 } |
| 234 |
| 235 if ( collectComments_ ) |
| 236 { |
| 237 lastValueEnd_ = current_; |
| 238 lastValue_ = ¤tValue(); |
| 239 } |
| 240 |
| 241 return successful; |
| 242 } |
| 243 |
| 244 |
| 245 void |
| 246 Reader::skipCommentTokens( Token &token ) |
| 247 { |
| 248 if ( features_.allowComments_ ) |
| 249 { |
| 250 do |
| 251 { |
| 252 readToken( token ); |
| 253 } |
| 254 while ( token.type_ == tokenComment ); |
| 255 } |
| 256 else |
| 257 { |
| 258 readToken( token ); |
| 259 } |
| 260 } |
| 261 |
| 262 |
| 263 bool |
| 264 Reader::expectToken( TokenType type, Token &token, const char *message ) |
| 265 { |
| 266 readToken( token ); |
| 267 if ( token.type_ != type ) |
| 268 return addError( message, token ); |
| 269 return true; |
| 270 } |
| 271 |
| 272 |
| 273 bool |
| 274 Reader::readToken( Token &token ) |
| 275 { |
| 276 skipSpaces(); |
| 277 token.start_ = current_; |
| 278 Char c = getNextChar(); |
| 279 bool ok = true; |
| 280 switch ( c ) |
| 281 { |
| 282 case '{': |
| 283 token.type_ = tokenObjectBegin; |
| 284 break; |
| 285 case '}': |
| 286 token.type_ = tokenObjectEnd; |
| 287 break; |
| 288 case '[': |
| 289 token.type_ = tokenArrayBegin; |
| 290 break; |
| 291 case ']': |
| 292 token.type_ = tokenArrayEnd; |
| 293 break; |
| 294 case '"': |
| 295 token.type_ = tokenString; |
| 296 ok = readString(); |
| 297 break; |
| 298 case '/': |
| 299 token.type_ = tokenComment; |
| 300 ok = readComment(); |
| 301 break; |
| 302 case '0': |
| 303 case '1': |
| 304 case '2': |
| 305 case '3': |
| 306 case '4': |
| 307 case '5': |
| 308 case '6': |
| 309 case '7': |
| 310 case '8': |
| 311 case '9': |
| 312 case '-': |
| 313 token.type_ = tokenNumber; |
| 314 readNumber(); |
| 315 break; |
| 316 case 't': |
| 317 token.type_ = tokenTrue; |
| 318 ok = match( "rue", 3 ); |
| 319 break; |
| 320 case 'f': |
| 321 token.type_ = tokenFalse; |
| 322 ok = match( "alse", 4 ); |
| 323 break; |
| 324 case 'n': |
| 325 token.type_ = tokenNull; |
| 326 ok = match( "ull", 3 ); |
| 327 break; |
| 328 case ',': |
| 329 token.type_ = tokenArraySeparator; |
| 330 break; |
| 331 case ':': |
| 332 token.type_ = tokenMemberSeparator; |
| 333 break; |
| 334 case 0: |
| 335 token.type_ = tokenEndOfStream; |
| 336 break; |
| 337 default: |
| 338 ok = false; |
| 339 break; |
| 340 } |
| 341 if ( !ok ) |
| 342 token.type_ = tokenError; |
| 343 token.end_ = current_; |
| 344 return true; |
| 345 } |
| 346 |
| 347 |
| 348 void |
| 349 Reader::skipSpaces() |
| 350 { |
| 351 while ( current_ != end_ ) |
| 352 { |
| 353 Char c = *current_; |
| 354 if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' ) |
| 355 ++current_; |
| 356 else |
| 357 break; |
| 358 } |
| 359 } |
| 360 |
| 361 |
| 362 bool |
| 363 Reader::match( Location pattern, |
| 364 int patternLength ) |
| 365 { |
| 366 if ( end_ - current_ < patternLength ) |
| 367 return false; |
| 368 int index = patternLength; |
| 369 while ( index-- ) |
| 370 if ( current_[index] != pattern[index] ) |
| 371 return false; |
| 372 current_ += patternLength; |
| 373 return true; |
| 374 } |
| 375 |
| 376 |
| 377 bool |
| 378 Reader::readComment() |
| 379 { |
| 380 Location commentBegin = current_ - 1; |
| 381 Char c = getNextChar(); |
| 382 bool successful = false; |
| 383 if ( c == '*' ) |
| 384 successful = readCStyleComment(); |
| 385 else if ( c == '/' ) |
| 386 successful = readCppStyleComment(); |
| 387 if ( !successful ) |
| 388 return false; |
| 389 |
| 390 if ( collectComments_ ) |
| 391 { |
| 392 CommentPlacement placement = commentBefore; |
| 393 if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) ) |
| 394 { |
| 395 if ( c != '*' || !containsNewLine( commentBegin, current_ ) ) |
| 396 placement = commentAfterOnSameLine; |
| 397 } |
| 398 |
| 399 addComment( commentBegin, current_, placement ); |
| 400 } |
| 401 return true; |
| 402 } |
| 403 |
| 404 |
| 405 void |
| 406 Reader::addComment( Location begin, |
| 407 Location end, |
| 408 CommentPlacement placement ) |
| 409 { |
| 410 assert( collectComments_ ); |
| 411 if ( placement == commentAfterOnSameLine ) |
| 412 { |
| 413 assert( lastValue_ != 0 ); |
| 414 lastValue_->setComment( std::string( begin, end ), placement ); |
| 415 } |
| 416 else |
| 417 { |
| 418 if ( !commentsBefore_.empty() ) |
| 419 commentsBefore_ += "\n"; |
| 420 commentsBefore_ += std::string( begin, end ); |
| 421 } |
| 422 } |
| 423 |
| 424 |
| 425 bool |
| 426 Reader::readCStyleComment() |
| 427 { |
| 428 while ( current_ != end_ ) |
| 429 { |
| 430 Char c = getNextChar(); |
| 431 if ( c == '*' && *current_ == '/' ) |
| 432 break; |
| 433 } |
| 434 return getNextChar() == '/'; |
| 435 } |
| 436 |
| 437 |
| 438 bool |
| 439 Reader::readCppStyleComment() |
| 440 { |
| 441 while ( current_ != end_ ) |
| 442 { |
| 443 Char c = getNextChar(); |
| 444 if ( c == '\r' || c == '\n' ) |
| 445 break; |
| 446 } |
| 447 return true; |
| 448 } |
| 449 |
| 450 |
| 451 void |
| 452 Reader::readNumber() |
| 453 { |
| 454 while ( current_ != end_ ) |
| 455 { |
| 456 if ( !(*current_ >= '0' && *current_ <= '9') && |
| 457 !in( *current_, '.', 'e', 'E', '+', '-' ) ) |
| 458 break; |
| 459 ++current_; |
| 460 } |
| 461 } |
| 462 |
| 463 bool |
| 464 Reader::readString() |
| 465 { |
| 466 Char c = 0; |
| 467 while ( current_ != end_ ) |
| 468 { |
| 469 c = getNextChar(); |
| 470 if ( c == '\\' ) |
| 471 getNextChar(); |
| 472 else if ( c == '"' ) |
| 473 break; |
| 474 } |
| 475 return c == '"'; |
| 476 } |
| 477 |
| 478 |
| 479 bool |
| 480 Reader::readObject( Token &/*tokenStart*/ ) |
| 481 { |
| 482 Token tokenName; |
| 483 std::string name; |
| 484 currentValue() = Value( objectValue ); |
| 485 while ( readToken( tokenName ) ) |
| 486 { |
| 487 bool initialTokenOk = true; |
| 488 while ( tokenName.type_ == tokenComment && initialTokenOk ) |
| 489 initialTokenOk = readToken( tokenName ); |
| 490 if ( !initialTokenOk ) |
| 491 break; |
| 492 if ( tokenName.type_ == tokenObjectEnd && name.empty() ) // empty objec
t |
| 493 return true; |
| 494 if ( tokenName.type_ != tokenString ) |
| 495 break; |
| 496 |
| 497 name = ""; |
| 498 if ( !decodeString( tokenName, name ) ) |
| 499 return recoverFromError( tokenObjectEnd ); |
| 500 |
| 501 Token colon; |
| 502 if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator ) |
| 503 { |
| 504 return addErrorAndRecover( "Missing ':' after object member name", |
| 505 colon, |
| 506 tokenObjectEnd ); |
| 507 } |
| 508 Value &value = currentValue()[ name ]; |
| 509 nodes_.push( &value ); |
| 510 bool ok = readValue(); |
| 511 nodes_.pop(); |
| 512 if ( !ok ) // error already set |
| 513 return recoverFromError( tokenObjectEnd ); |
| 514 |
| 515 Token comma; |
| 516 if ( !readToken( comma ) |
| 517 || ( comma.type_ != tokenObjectEnd && |
| 518 comma.type_ != tokenArraySeparator && |
| 519 comma.type_ != tokenComment ) ) |
| 520 { |
| 521 return addErrorAndRecover( "Missing ',' or '}' in object declaration", |
| 522 comma, |
| 523 tokenObjectEnd ); |
| 524 } |
| 525 bool finalizeTokenOk = true; |
| 526 while ( comma.type_ == tokenComment && |
| 527 finalizeTokenOk ) |
| 528 finalizeTokenOk = readToken( comma ); |
| 529 if ( comma.type_ == tokenObjectEnd ) |
| 530 return true; |
| 531 } |
| 532 return addErrorAndRecover( "Missing '}' or object member name", |
| 533 tokenName, |
| 534 tokenObjectEnd ); |
| 535 } |
| 536 |
| 537 |
| 538 bool |
| 539 Reader::readArray( Token &/*tokenStart*/ ) |
| 540 { |
| 541 currentValue() = Value( arrayValue ); |
| 542 skipSpaces(); |
| 543 if ( *current_ == ']' ) // empty array |
| 544 { |
| 545 Token endArray; |
| 546 readToken( endArray ); |
| 547 return true; |
| 548 } |
| 549 int index = 0; |
| 550 for (;;) |
| 551 { |
| 552 Value &value = currentValue()[ index++ ]; |
| 553 nodes_.push( &value ); |
| 554 bool ok = readValue(); |
| 555 nodes_.pop(); |
| 556 if ( !ok ) // error already set |
| 557 return recoverFromError( tokenArrayEnd ); |
| 558 |
| 559 Token token; |
| 560 // Accept Comment after last item in the array. |
| 561 ok = readToken( token ); |
| 562 while ( token.type_ == tokenComment && ok ) |
| 563 { |
| 564 ok = readToken( token ); |
| 565 } |
| 566 bool badTokenType = ( token.type_ != tokenArraySeparator && |
| 567 token.type_ != tokenArrayEnd ); |
| 568 if ( !ok || badTokenType ) |
| 569 { |
| 570 return addErrorAndRecover( "Missing ',' or ']' in array declaration", |
| 571 token, |
| 572 tokenArrayEnd ); |
| 573 } |
| 574 if ( token.type_ == tokenArrayEnd ) |
| 575 break; |
| 576 } |
| 577 return true; |
| 578 } |
| 579 |
| 580 |
| 581 bool |
| 582 Reader::decodeNumber( Token &token ) |
| 583 { |
| 584 bool isDouble = false; |
| 585 for ( Location inspect = token.start_; inspect != token.end_; ++inspect ) |
| 586 { |
| 587 isDouble = isDouble |
| 588 || in( *inspect, '.', 'e', 'E', '+' ) |
| 589 || ( *inspect == '-' && inspect != token.start_ ); |
| 590 } |
| 591 if ( isDouble ) |
| 592 return decodeDouble( token ); |
| 593 // Attempts to parse the number as an integer. If the number is |
| 594 // larger than the maximum supported value of an integer then |
| 595 // we decode the number as a double. |
| 596 Location current = token.start_; |
| 597 bool isNegative = *current == '-'; |
| 598 if ( isNegative ) |
| 599 ++current; |
| 600 Value::LargestUInt maxIntegerValue = isNegative ? Value::LargestUInt(-Value::
minLargestInt) |
| 601 : Value::maxLargestUInt; |
| 602 Value::LargestUInt threshold = maxIntegerValue / 10; |
| 603 Value::LargestUInt value = 0; |
| 604 while ( current < token.end_ ) |
| 605 { |
| 606 Char c = *current++; |
| 607 if ( c < '0' || c > '9' ) |
| 608 return addError( "'" + std::string( token.start_, token.end_ ) + "' is
not a number.", token ); |
| 609 Value::UInt digit(c - '0'); |
| 610 if ( value >= threshold ) |
| 611 { |
| 612 // We've hit or exceeded the max value divided by 10 (rounded down). If |
| 613 // a) we've only just touched the limit, b) this is the last digit, and |
| 614 // c) it's small enough to fit in that rounding delta, we're okay. |
| 615 // Otherwise treat this number as a double to avoid overflow. |
| 616 if (value > threshold || |
| 617 current != token.end_ || |
| 618 digit > maxIntegerValue % 10) |
| 619 { |
| 620 return decodeDouble( token ); |
| 621 } |
| 622 } |
| 623 value = value * 10 + digit; |
| 624 } |
| 625 if ( isNegative ) |
| 626 currentValue() = -Value::LargestInt( value ); |
| 627 else if ( value <= Value::LargestUInt(Value::maxInt) ) |
| 628 currentValue() = Value::LargestInt( value ); |
| 629 else |
| 630 currentValue() = value; |
| 631 return true; |
| 632 } |
| 633 |
| 634 |
| 635 bool |
| 636 Reader::decodeDouble( Token &token ) |
| 637 { |
| 638 double value = 0; |
| 639 const int bufferSize = 32; |
| 640 int count; |
| 641 int length = int(token.end_ - token.start_); |
| 642 |
| 643 // Sanity check to avoid buffer overflow exploits. |
| 644 if (length < 0) { |
| 645 return addError( "Unable to parse token length", token ); |
| 646 } |
| 647 |
| 648 // Avoid using a string constant for the format control string given to |
| 649 // sscanf, as this can cause hard to debug crashes on OS X. See here for more |
| 650 // info: |
| 651 // |
| 652 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/g
cc-4.0.1/gcc/Incompatibilities.html |
| 653 char format[] = "%lf"; |
| 654 |
| 655 if ( length <= bufferSize ) |
| 656 { |
| 657 Char buffer[bufferSize+1]; |
| 658 memcpy( buffer, token.start_, length ); |
| 659 buffer[length] = 0; |
| 660 count = sscanf( buffer, format, &value ); |
| 661 } |
| 662 else |
| 663 { |
| 664 std::string buffer( token.start_, token.end_ ); |
| 665 count = sscanf( buffer.c_str(), format, &value ); |
| 666 } |
| 667 |
| 668 if ( count != 1 ) |
| 669 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not
a number.", token ); |
| 670 currentValue() = value; |
| 671 return true; |
| 672 } |
| 673 |
| 674 |
| 675 bool |
| 676 Reader::decodeString( Token &token ) |
| 677 { |
| 678 std::string decoded; |
| 679 if ( !decodeString( token, decoded ) ) |
| 680 return false; |
| 681 currentValue() = decoded; |
| 682 return true; |
| 683 } |
| 684 |
| 685 |
| 686 bool |
| 687 Reader::decodeString( Token &token, std::string &decoded ) |
| 688 { |
| 689 decoded.reserve( token.end_ - token.start_ - 2 ); |
| 690 Location current = token.start_ + 1; // skip '"' |
| 691 Location end = token.end_ - 1; // do not include '"' |
| 692 while ( current != end ) |
| 693 { |
| 694 Char c = *current++; |
| 695 if ( c == '"' ) |
| 696 break; |
| 697 else if ( c == '\\' ) |
| 698 { |
| 699 if ( current == end ) |
| 700 return addError( "Empty escape sequence in string", token, current )
; |
| 701 Char escape = *current++; |
| 702 switch ( escape ) |
| 703 { |
| 704 case '"': decoded += '"'; break; |
| 705 case '/': decoded += '/'; break; |
| 706 case '\\': decoded += '\\'; break; |
| 707 case 'b': decoded += '\b'; break; |
| 708 case 'f': decoded += '\f'; break; |
| 709 case 'n': decoded += '\n'; break; |
| 710 case 'r': decoded += '\r'; break; |
| 711 case 't': decoded += '\t'; break; |
| 712 case 'u': |
| 713 { |
| 714 unsigned int unicode; |
| 715 if ( !decodeUnicodeCodePoint( token, current, end, unicode ) ) |
| 716 return false; |
| 717 decoded += codePointToUTF8(unicode); |
| 718 } |
| 719 break; |
| 720 default: |
| 721 return addError( "Bad escape sequence in string", token, current ); |
| 722 } |
| 723 } |
| 724 else |
| 725 { |
| 726 decoded += c; |
| 727 } |
| 728 } |
| 729 return true; |
| 730 } |
| 731 |
| 732 bool |
| 733 Reader::decodeUnicodeCodePoint( Token &token, |
| 734 Location ¤t, |
| 735 Location end, |
| 736 unsigned int &unicode ) |
| 737 { |
| 738 |
| 739 if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) ) |
| 740 return false; |
| 741 if (unicode >= 0xD800 && unicode <= 0xDBFF) |
| 742 { |
| 743 // surrogate pairs |
| 744 if (end - current < 6) |
| 745 return addError( "additional six characters expected to parse unicode s
urrogate pair.", token, current ); |
| 746 unsigned int surrogatePair; |
| 747 if (*(current++) == '\\' && *(current++)== 'u') |
| 748 { |
| 749 if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair )) |
| 750 { |
| 751 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3
FF); |
| 752 } |
| 753 else |
| 754 return false; |
| 755 } |
| 756 else |
| 757 return addError( "expecting another \\u token to begin the second half
of a unicode surrogate pair", token, current ); |
| 758 } |
| 759 return true; |
| 760 } |
| 761 |
| 762 bool |
| 763 Reader::decodeUnicodeEscapeSequence( Token &token, |
| 764 Location ¤t, |
| 765 Location end, |
| 766 unsigned int &unicode ) |
| 767 { |
| 768 if ( end - current < 4 ) |
| 769 return addError( "Bad unicode escape sequence in string: four digits expec
ted.", token, current ); |
| 770 unicode = 0; |
| 771 for ( int index =0; index < 4; ++index ) |
| 772 { |
| 773 Char c = *current++; |
| 774 unicode *= 16; |
| 775 if ( c >= '0' && c <= '9' ) |
| 776 unicode += c - '0'; |
| 777 else if ( c >= 'a' && c <= 'f' ) |
| 778 unicode += c - 'a' + 10; |
| 779 else if ( c >= 'A' && c <= 'F' ) |
| 780 unicode += c - 'A' + 10; |
| 781 else |
| 782 return addError( "Bad unicode escape sequence in string: hexadecimal di
git expected.", token, current ); |
| 783 } |
| 784 return true; |
| 785 } |
| 786 |
| 787 |
| 788 bool |
| 789 Reader::addError( const std::string &message, |
| 790 Token &token, |
| 791 Location extra ) |
| 792 { |
| 793 ErrorInfo info; |
| 794 info.token_ = token; |
| 795 info.message_ = message; |
| 796 info.extra_ = extra; |
| 797 errors_.push_back( info ); |
| 798 return false; |
| 799 } |
| 800 |
| 801 |
| 802 bool |
| 803 Reader::recoverFromError( TokenType skipUntilToken ) |
| 804 { |
| 805 int errorCount = int(errors_.size()); |
| 806 Token skip; |
| 807 for (;;) |
| 808 { |
| 809 if ( !readToken(skip) ) |
| 810 errors_.resize( errorCount ); // discard errors caused by recovery |
| 811 if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream ) |
| 812 break; |
| 813 } |
| 814 errors_.resize( errorCount ); |
| 815 return false; |
| 816 } |
| 817 |
| 818 |
| 819 bool |
| 820 Reader::addErrorAndRecover( const std::string &message, |
| 821 Token &token, |
| 822 TokenType skipUntilToken ) |
| 823 { |
| 824 addError( message, token ); |
| 825 return recoverFromError( skipUntilToken ); |
| 826 } |
| 827 |
| 828 |
| 829 Value & |
| 830 Reader::currentValue() |
| 831 { |
| 832 return *(nodes_.top()); |
| 833 } |
| 834 |
| 835 |
| 836 Reader::Char |
| 837 Reader::getNextChar() |
| 838 { |
| 839 if ( current_ == end_ ) |
| 840 return 0; |
| 841 return *current_++; |
| 842 } |
| 843 |
| 844 |
| 845 void |
| 846 Reader::getLocationLineAndColumn( Location location, |
| 847 int &line, |
| 848 int &column ) const |
| 849 { |
| 850 Location current = begin_; |
| 851 Location lastLineStart = current; |
| 852 line = 0; |
| 853 while ( current < location && current != end_ ) |
| 854 { |
| 855 Char c = *current++; |
| 856 if ( c == '\r' ) |
| 857 { |
| 858 if ( *current == '\n' ) |
| 859 ++current; |
| 860 lastLineStart = current; |
| 861 ++line; |
| 862 } |
| 863 else if ( c == '\n' ) |
| 864 { |
| 865 lastLineStart = current; |
| 866 ++line; |
| 867 } |
| 868 } |
| 869 // column & line start at 1 |
| 870 column = int(location - lastLineStart) + 1; |
| 871 ++line; |
| 872 } |
| 873 |
| 874 |
| 875 std::string |
| 876 Reader::getLocationLineAndColumn( Location location ) const |
| 877 { |
| 878 int line, column; |
| 879 getLocationLineAndColumn( location, line, column ); |
| 880 char buffer[18+16+16+1]; |
| 881 sprintf( buffer, "Line %d, Column %d", line, column ); |
| 882 return buffer; |
| 883 } |
| 884 |
| 885 |
| 886 // Deprecated. Preserved for backward compatibility |
| 887 std::string |
| 888 Reader::getFormatedErrorMessages() const |
| 889 { |
| 890 return getFormattedErrorMessages(); |
| 891 } |
| 892 |
| 893 |
| 894 std::string |
| 895 Reader::getFormattedErrorMessages() const |
| 896 { |
| 897 std::string formattedMessage; |
| 898 for ( Errors::const_iterator itError = errors_.begin(); |
| 899 itError != errors_.end(); |
| 900 ++itError ) |
| 901 { |
| 902 const ErrorInfo &error = *itError; |
| 903 formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ )
+ "\n"; |
| 904 formattedMessage += " " + error.message_ + "\n"; |
| 905 if ( error.extra_ ) |
| 906 formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) +
" for detail.\n"; |
| 907 } |
| 908 return formattedMessage; |
| 909 } |
| 910 |
| 911 |
| 912 std::istream& operator>>( std::istream &sin, Value &root ) |
| 913 { |
| 914 Json::Reader reader; |
| 915 bool ok = reader.parse(sin, root, true); |
| 916 if (!ok) { |
| 917 fprintf( |
| 918 stderr, |
| 919 "Error from reader: %s", |
| 920 reader.getFormattedErrorMessages().c_str()); |
| 921 |
| 922 JSON_FAIL_MESSAGE("reader error"); |
| 923 } |
| 924 return sin; |
| 925 } |
| 926 |
| 927 |
| 928 } // namespace Json |
OLD | NEW |