| OLD | NEW |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/json_reader.h" | 5 #include "base/json_reader.h" |
| 6 | 6 |
| 7 #include "base/float_util.h" | 7 #include "base/float_util.h" |
| 8 #include "base/logging.h" | 8 #include "base/logging.h" |
| 9 #include "base/string_util.h" | 9 #include "base/string_util.h" |
| 10 #include "base/values.h" | 10 #include "base/values.h" |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 64 return false; | 64 return false; |
| 65 } | 65 } |
| 66 } | 66 } |
| 67 | 67 |
| 68 token.length += digits; | 68 token.length += digits; |
| 69 return true; | 69 return true; |
| 70 } | 70 } |
| 71 | 71 |
| 72 } // anonymous namespace | 72 } // anonymous namespace |
| 73 | 73 |
| 74 const char* JSONReader::kBadRootElementType = |
| 75 "Root value must be an array or object."; |
| 76 const char* JSONReader::kInvalidEscape = |
| 77 "Invalid escape sequence."; |
| 78 const char* JSONReader::kSyntaxError = |
| 79 "Syntax error."; |
| 80 const char* JSONReader::kTrailingComma = |
| 81 "Trailing comma not allowed."; |
| 82 const char* JSONReader::kTooMuchNesting = |
| 83 "Too much nesting."; |
| 84 const char* JSONReader::kUnexpectedDataAfterRoot = |
| 85 "Unexpected data after root element."; |
| 86 const char* JSONReader::kUnsupportedEncoding = |
| 87 "Unsupported encoding. JSON must be UTF-8."; |
| 88 const char* JSONReader::kUnquotedDictionaryKey = |
| 89 "Dictionary keys must be quoted."; |
| 90 |
| 74 /* static */ | 91 /* static */ |
| 75 bool JSONReader::Read(const std::string& json, | 92 bool JSONReader::Read(const std::string& json, |
| 76 Value** root, | 93 Value** root, |
| 77 bool allow_trailing_comma) { | 94 bool allow_trailing_comma) { |
| 78 return JsonToValue(json, root, true, allow_trailing_comma); | 95 return ReadAndReturnError(json, root, allow_trailing_comma, NULL); |
| 79 } | 96 } |
| 80 | 97 |
| 81 /* static */ | 98 /* static */ |
| 82 bool JSONReader::JsonToValue(const std::string& json, | 99 bool JSONReader::ReadAndReturnError(const std::string& json, |
| 83 Value** root, | 100 Value** root, |
| 84 bool check_root, | 101 bool allow_trailing_comma, |
| 85 bool allow_trailing_comma) { | 102 std::string *error_message_out) { |
| 103 JSONReader reader = JSONReader(); |
| 104 if (reader.JsonToValue(json, root, true, allow_trailing_comma)) { |
| 105 return true; |
| 106 } else { |
| 107 if (error_message_out) |
| 108 *error_message_out = *reader.error_message(); |
| 109 return false; |
| 110 } |
| 111 } |
| 112 |
| 113 /* static */ |
| 114 std::string JSONReader::FormatErrorMessage(int line, int column, |
| 115 const char* description) { |
| 116 return StringPrintf("Line: %i, column: %i, %s", |
| 117 line, column, description); |
| 118 } |
| 119 |
| 120 JSONReader::JSONReader() |
| 121 : start_pos_(NULL), json_pos_(NULL), stack_depth_(0), |
| 122 allow_trailing_comma_(false) {} |
| 123 |
| 124 bool JSONReader::JsonToValue(const std::string& json, Value** root, |
| 125 bool check_root, bool allow_trailing_comma) { |
| 86 // The input must be in UTF-8. | 126 // The input must be in UTF-8. |
| 87 if (!IsStringUTF8(json.c_str())) | 127 if (!IsStringUTF8(json.c_str())) { |
| 128 error_message_ = kUnsupportedEncoding; |
| 88 return false; | 129 return false; |
| 130 } |
| 131 |
| 89 // The conversion from UTF8 to wstring removes null bytes for us | 132 // The conversion from UTF8 to wstring removes null bytes for us |
| 90 // (a good thing). | 133 // (a good thing). |
| 91 std::wstring json_wide(UTF8ToWide(json)); | 134 std::wstring json_wide(UTF8ToWide(json)); |
| 92 const wchar_t* json_cstr = json_wide.c_str(); | 135 start_pos_ = json_wide.c_str(); |
| 93 | 136 |
| 94 // When the input JSON string starts with a UTF-8 Byte-Order-Mark | 137 // When the input JSON string starts with a UTF-8 Byte-Order-Mark |
| 95 // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a Unicode | 138 // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a Unicode |
| 96 // BOM (U+FEFF). To avoid the JSONReader::BuildValue() function from | 139 // BOM (U+FEFF). To avoid the JSONReader::BuildValue() function from |
| 97 // mis-treating a Unicode BOM as an invalid character and returning false, | 140 // mis-treating a Unicode BOM as an invalid character and returning false, |
| 98 // skip a converted Unicode BOM if it exists. | 141 // skip a converted Unicode BOM if it exists. |
| 99 if (!json_wide.empty() && json_cstr[0] == 0xFEFF) { | 142 if (!json_wide.empty() && start_pos_[0] == 0xFEFF) { |
| 100 ++json_cstr; | 143 ++start_pos_; |
| 101 } | 144 } |
| 102 | 145 |
| 103 JSONReader reader(json_cstr, allow_trailing_comma); | 146 json_pos_ = start_pos_; |
| 147 allow_trailing_comma_ = allow_trailing_comma; |
| 148 stack_depth_ = 0; |
| 149 error_message_.clear(); |
| 104 | 150 |
| 105 Value* temp_root = NULL; | 151 Value* temp_root = NULL; |
| 106 bool success = reader.BuildValue(&temp_root, check_root); | |
| 107 | 152 |
| 108 // Only modify root_ if we have valid JSON and nothing else. | 153 // Only modify root_ if we have valid JSON and nothing else. |
| 109 if (success && reader.ParseToken().type == Token::END_OF_INPUT) { | 154 if (BuildValue(&temp_root, check_root)) { |
| 110 *root = temp_root; | 155 if (ParseToken().type == Token::END_OF_INPUT) { |
| 111 return true; | 156 *root = temp_root; |
| 157 return true; |
| 158 } else { |
| 159 SetErrorMessage(kUnexpectedDataAfterRoot, json_pos_); |
| 160 } |
| 112 } | 161 } |
| 113 | 162 |
| 163 // Default to calling errors "syntax errors". |
| 164 if (error_message_.empty()) |
| 165 SetErrorMessage(kSyntaxError, json_pos_); |
| 166 |
| 114 if (temp_root) | 167 if (temp_root) |
| 115 delete temp_root; | 168 delete temp_root; |
| 116 return false; | 169 return false; |
| 117 } | 170 } |
| 118 | 171 |
| 119 JSONReader::JSONReader(const wchar_t* json_start_pos, | |
| 120 bool allow_trailing_comma) | |
| 121 : json_pos_(json_start_pos), | |
| 122 stack_depth_(0), | |
| 123 allow_trailing_comma_(allow_trailing_comma) {} | |
| 124 | |
| 125 bool JSONReader::BuildValue(Value** node, bool is_root) { | 172 bool JSONReader::BuildValue(Value** node, bool is_root) { |
| 126 ++stack_depth_; | 173 ++stack_depth_; |
| 127 if (stack_depth_ > kStackLimit) | 174 if (stack_depth_ > kStackLimit) { |
| 175 SetErrorMessage(kTooMuchNesting, json_pos_); |
| 128 return false; | 176 return false; |
| 177 } |
| 129 | 178 |
| 130 Token token = ParseToken(); | 179 Token token = ParseToken(); |
| 131 // The root token must be an array or an object. | 180 // The root token must be an array or an object. |
| 132 if (is_root && token.type != Token::OBJECT_BEGIN && | 181 if (is_root && token.type != Token::OBJECT_BEGIN && |
| 133 token.type != Token::ARRAY_BEGIN) { | 182 token.type != Token::ARRAY_BEGIN) { |
| 183 SetErrorMessage(kBadRootElementType, json_pos_); |
| 134 return false; | 184 return false; |
| 135 } | 185 } |
| 136 | 186 |
| 137 switch (token.type) { | 187 switch (token.type) { |
| 138 case Token::END_OF_INPUT: | 188 case Token::END_OF_INPUT: |
| 139 case Token::INVALID_TOKEN: | 189 case Token::INVALID_TOKEN: |
| 140 return false; | 190 return false; |
| 141 | 191 |
| 142 case Token::NULL_TOKEN: | 192 case Token::NULL_TOKEN: |
| 143 *node = Value::CreateNullValue(); | 193 *node = Value::CreateNullValue(); |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 177 | 227 |
| 178 // After a list value, we expect a comma or the end of the list. | 228 // After a list value, we expect a comma or the end of the list. |
| 179 token = ParseToken(); | 229 token = ParseToken(); |
| 180 if (token.type == Token::LIST_SEPARATOR) { | 230 if (token.type == Token::LIST_SEPARATOR) { |
| 181 json_pos_ += token.length; | 231 json_pos_ += token.length; |
| 182 token = ParseToken(); | 232 token = ParseToken(); |
| 183 // Trailing commas are invalid according to the JSON RFC, but some | 233 // Trailing commas are invalid according to the JSON RFC, but some |
| 184 // consumers need the parsing leniency, so handle accordingly. | 234 // consumers need the parsing leniency, so handle accordingly. |
| 185 if (token.type == Token::ARRAY_END) { | 235 if (token.type == Token::ARRAY_END) { |
| 186 if (!allow_trailing_comma_) { | 236 if (!allow_trailing_comma_) { |
| 237 SetErrorMessage(kTrailingComma, json_pos_); |
| 187 delete array; | 238 delete array; |
| 188 return false; | 239 return false; |
| 189 } | 240 } |
| 190 // Trailing comma OK, stop parsing the Array. | 241 // Trailing comma OK, stop parsing the Array. |
| 191 break; | 242 break; |
| 192 } | 243 } |
| 193 } else if (token.type != Token::ARRAY_END) { | 244 } else if (token.type != Token::ARRAY_END) { |
| 194 // Unexpected value after list value. Bail out. | 245 // Unexpected value after list value. Bail out. |
| 195 delete array; | 246 delete array; |
| 196 return false; | 247 return false; |
| 197 } | 248 } |
| 198 } | 249 } |
| 199 if (token.type != Token::ARRAY_END) { | 250 if (token.type != Token::ARRAY_END) { |
| 200 delete array; | 251 delete array; |
| 201 return false; | 252 return false; |
| 202 } | 253 } |
| 203 *node = array; | 254 *node = array; |
| 204 break; | 255 break; |
| 205 } | 256 } |
| 206 | 257 |
| 207 case Token::OBJECT_BEGIN: | 258 case Token::OBJECT_BEGIN: |
| 208 { | 259 { |
| 209 json_pos_ += token.length; | 260 json_pos_ += token.length; |
| 210 token = ParseToken(); | 261 token = ParseToken(); |
| 211 | 262 |
| 212 DictionaryValue* dict = new DictionaryValue; | 263 DictionaryValue* dict = new DictionaryValue; |
| 213 while (token.type != Token::OBJECT_END) { | 264 while (token.type != Token::OBJECT_END) { |
| 214 if (token.type != Token::STRING) { | 265 if (token.type != Token::STRING) { |
| 266 SetErrorMessage(kUnquotedDictionaryKey, json_pos_); |
| 215 delete dict; | 267 delete dict; |
| 216 return false; | 268 return false; |
| 217 } | 269 } |
| 218 Value* dict_key_value = NULL; | 270 Value* dict_key_value = NULL; |
| 219 if (!DecodeString(token, &dict_key_value)) { | 271 if (!DecodeString(token, &dict_key_value)) { |
| 220 delete dict; | 272 delete dict; |
| 221 return false; | 273 return false; |
| 222 } | 274 } |
| 223 // Convert the key into a wstring. | 275 // Convert the key into a wstring. |
| 224 std::wstring dict_key; | 276 std::wstring dict_key; |
| (...skipping 20 matching lines...) Expand all Loading... |
| 245 // After a key/value pair, we expect a comma or the end of the | 297 // After a key/value pair, we expect a comma or the end of the |
| 246 // object. | 298 // object. |
| 247 token = ParseToken(); | 299 token = ParseToken(); |
| 248 if (token.type == Token::LIST_SEPARATOR) { | 300 if (token.type == Token::LIST_SEPARATOR) { |
| 249 json_pos_ += token.length; | 301 json_pos_ += token.length; |
| 250 token = ParseToken(); | 302 token = ParseToken(); |
| 251 // Trailing commas are invalid according to the JSON RFC, but some | 303 // Trailing commas are invalid according to the JSON RFC, but some |
| 252 // consumers need the parsing leniency, so handle accordingly. | 304 // consumers need the parsing leniency, so handle accordingly. |
| 253 if (token.type == Token::OBJECT_END) { | 305 if (token.type == Token::OBJECT_END) { |
| 254 if (!allow_trailing_comma_) { | 306 if (!allow_trailing_comma_) { |
| 307 SetErrorMessage(kTrailingComma, json_pos_); |
| 255 delete dict; | 308 delete dict; |
| 256 return false; | 309 return false; |
| 257 } | 310 } |
| 258 // Trailing comma OK, stop parsing the Object. | 311 // Trailing comma OK, stop parsing the Object. |
| 259 break; | 312 break; |
| 260 } | 313 } |
| 261 } else if (token.type != Token::OBJECT_END) { | 314 } else if (token.type != Token::OBJECT_END) { |
| 262 // Unexpected value after last object value. Bail out. | 315 // Unexpected value after last object value. Bail out. |
| 263 delete dict; | 316 delete dict; |
| 264 return false; | 317 return false; |
| (...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 340 JSONReader::Token JSONReader::ParseStringToken() { | 393 JSONReader::Token JSONReader::ParseStringToken() { |
| 341 Token token(Token::STRING, json_pos_, 1); | 394 Token token(Token::STRING, json_pos_, 1); |
| 342 wchar_t c = token.NextChar(); | 395 wchar_t c = token.NextChar(); |
| 343 while ('\0' != c) { | 396 while ('\0' != c) { |
| 344 if ('\\' == c) { | 397 if ('\\' == c) { |
| 345 ++token.length; | 398 ++token.length; |
| 346 c = token.NextChar(); | 399 c = token.NextChar(); |
| 347 // Make sure the escaped char is valid. | 400 // Make sure the escaped char is valid. |
| 348 switch (c) { | 401 switch (c) { |
| 349 case 'x': | 402 case 'x': |
| 350 if (!ReadHexDigits(token, 2)) | 403 if (!ReadHexDigits(token, 2)) { |
| 404 SetErrorMessage(kInvalidEscape, json_pos_ + token.length); |
| 351 return kInvalidToken; | 405 return kInvalidToken; |
| 406 } |
| 352 break; | 407 break; |
| 353 case 'u': | 408 case 'u': |
| 354 if (!ReadHexDigits(token, 4)) | 409 if (!ReadHexDigits(token, 4)) { |
| 410 SetErrorMessage(kInvalidEscape, json_pos_ + token.length); |
| 355 return kInvalidToken; | 411 return kInvalidToken; |
| 412 } |
| 356 break; | 413 break; |
| 357 case '\\': | 414 case '\\': |
| 358 case '/': | 415 case '/': |
| 359 case 'b': | 416 case 'b': |
| 360 case 'f': | 417 case 'f': |
| 361 case 'n': | 418 case 'n': |
| 362 case 'r': | 419 case 'r': |
| 363 case 't': | 420 case 't': |
| 364 case 'v': | 421 case 'v': |
| 365 case '"': | 422 case '"': |
| 366 break; | 423 break; |
| 367 default: | 424 default: |
| 425 SetErrorMessage(kInvalidEscape, json_pos_ + token.length); |
| 368 return kInvalidToken; | 426 return kInvalidToken; |
| 369 } | 427 } |
| 370 } else if ('"' == c) { | 428 } else if ('"' == c) { |
| 371 ++token.length; | 429 ++token.length; |
| 372 return token; | 430 return token; |
| 373 } | 431 } |
| 374 ++token.length; | 432 ++token.length; |
| 375 c = token.NextChar(); | 433 c = token.NextChar(); |
| 376 } | 434 } |
| 377 return kInvalidToken; | 435 return kInvalidToken; |
| (...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 575 default: | 633 default: |
| 576 ++json_pos_; | 634 ++json_pos_; |
| 577 } | 635 } |
| 578 } | 636 } |
| 579 } else { | 637 } else { |
| 580 return false; | 638 return false; |
| 581 } | 639 } |
| 582 return true; | 640 return true; |
| 583 } | 641 } |
| 584 | 642 |
| 643 void JSONReader::SetErrorMessage(const char* description, |
| 644 const wchar_t* error_pos) { |
| 645 int line_number = 1; |
| 646 int column_number = 1; |
| 647 |
| 648 // Figure out the line and column the error occured at. |
| 649 for (const wchar_t* pos = start_pos_; pos != error_pos; ++pos) { |
| 650 if (*pos == '\0') { |
| 651 NOTREACHED(); |
| 652 return; |
| 653 } |
| 654 |
| 655 if (*pos == '\n') { |
| 656 ++line_number; |
| 657 column_number = 1; |
| 658 } else { |
| 659 ++column_number; |
| 660 } |
| 661 } |
| 662 |
| 663 error_message_ = FormatErrorMessage(line_number, column_number, description); |
| 664 } |
| OLD | NEW |