OLD | NEW |
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/json_reader.h" | 5 #include "base/json_reader.h" |
6 | 6 |
7 #include "base/float_util.h" | 7 #include "base/float_util.h" |
8 #include "base/logging.h" | 8 #include "base/logging.h" |
9 #include "base/string_util.h" | 9 #include "base/string_util.h" |
10 #include "base/values.h" | 10 #include "base/values.h" |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
64 return false; | 64 return false; |
65 } | 65 } |
66 } | 66 } |
67 | 67 |
68 token.length += digits; | 68 token.length += digits; |
69 return true; | 69 return true; |
70 } | 70 } |
71 | 71 |
72 } // anonymous namespace | 72 } // anonymous namespace |
73 | 73 |
| 74 const char* JSONReader::kBadRootElementType = |
| 75 "Root value must be an array or object."; |
| 76 const char* JSONReader::kInvalidEscape = |
| 77 "Invalid escape sequence."; |
| 78 const char* JSONReader::kSyntaxError = |
| 79 "Syntax error."; |
| 80 const char* JSONReader::kTrailingComma = |
| 81 "Trailing comma not allowed."; |
| 82 const char* JSONReader::kTooMuchNesting = |
| 83 "Too much nesting."; |
| 84 const char* JSONReader::kUnexpectedDataAfterRoot = |
| 85 "Unexpected data after root element."; |
| 86 const char* JSONReader::kUnsupportedEncoding = |
| 87 "Unsupported encoding. JSON must be UTF-8."; |
| 88 const char* JSONReader::kUnquotedDictionaryKey = |
| 89 "Dictionary keys must be quoted."; |
| 90 |
74 /* static */ | 91 /* static */ |
75 bool JSONReader::Read(const std::string& json, | 92 bool JSONReader::Read(const std::string& json, |
76 Value** root, | 93 Value** root, |
77 bool allow_trailing_comma) { | 94 bool allow_trailing_comma) { |
78 return JsonToValue(json, root, true, allow_trailing_comma); | 95 return ReadAndReturnError(json, root, allow_trailing_comma, NULL); |
79 } | 96 } |
80 | 97 |
81 /* static */ | 98 /* static */ |
82 bool JSONReader::JsonToValue(const std::string& json, | 99 bool JSONReader::ReadAndReturnError(const std::string& json, |
83 Value** root, | 100 Value** root, |
84 bool check_root, | 101 bool allow_trailing_comma, |
85 bool allow_trailing_comma) { | 102 std::string *error_message_out) { |
| 103 JSONReader reader = JSONReader(); |
| 104 if (reader.JsonToValue(json, root, true, allow_trailing_comma)) { |
| 105 return true; |
| 106 } else { |
| 107 if (error_message_out) |
| 108 *error_message_out = *reader.error_message(); |
| 109 return false; |
| 110 } |
| 111 } |
| 112 |
| 113 /* static */ |
| 114 std::string JSONReader::FormatErrorMessage(int line, int column, |
| 115 const char* description) { |
| 116 return StringPrintf("Line: %i, column: %i, %s", |
| 117 line, column, description); |
| 118 } |
| 119 |
| 120 JSONReader::JSONReader() |
| 121 : start_pos_(NULL), json_pos_(NULL), stack_depth_(0), |
| 122 allow_trailing_comma_(false) {} |
| 123 |
| 124 bool JSONReader::JsonToValue(const std::string& json, Value** root, |
| 125 bool check_root, bool allow_trailing_comma) { |
86 // The input must be in UTF-8. | 126 // The input must be in UTF-8. |
87 if (!IsStringUTF8(json.c_str())) | 127 if (!IsStringUTF8(json.c_str())) { |
| 128 error_message_ = kUnsupportedEncoding; |
88 return false; | 129 return false; |
| 130 } |
| 131 |
89 // The conversion from UTF8 to wstring removes null bytes for us | 132 // The conversion from UTF8 to wstring removes null bytes for us |
90 // (a good thing). | 133 // (a good thing). |
91 std::wstring json_wide(UTF8ToWide(json)); | 134 std::wstring json_wide(UTF8ToWide(json)); |
92 const wchar_t* json_cstr = json_wide.c_str(); | 135 start_pos_ = json_wide.c_str(); |
93 | 136 |
94 // When the input JSON string starts with a UTF-8 Byte-Order-Mark | 137 // When the input JSON string starts with a UTF-8 Byte-Order-Mark |
95 // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a Unicode | 138 // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a Unicode |
96 // BOM (U+FEFF). To avoid the JSONReader::BuildValue() function from | 139 // BOM (U+FEFF). To avoid the JSONReader::BuildValue() function from |
97 // mis-treating a Unicode BOM as an invalid character and returning false, | 140 // mis-treating a Unicode BOM as an invalid character and returning false, |
98 // skip a converted Unicode BOM if it exists. | 141 // skip a converted Unicode BOM if it exists. |
99 if (!json_wide.empty() && json_cstr[0] == 0xFEFF) { | 142 if (!json_wide.empty() && start_pos_[0] == 0xFEFF) { |
100 ++json_cstr; | 143 ++start_pos_; |
101 } | 144 } |
102 | 145 |
103 JSONReader reader(json_cstr, allow_trailing_comma); | 146 json_pos_ = start_pos_; |
| 147 allow_trailing_comma_ = allow_trailing_comma; |
| 148 stack_depth_ = 0; |
| 149 error_message_.clear(); |
104 | 150 |
105 Value* temp_root = NULL; | 151 Value* temp_root = NULL; |
106 bool success = reader.BuildValue(&temp_root, check_root); | |
107 | 152 |
108 // Only modify root_ if we have valid JSON and nothing else. | 153 // Only modify root_ if we have valid JSON and nothing else. |
109 if (success && reader.ParseToken().type == Token::END_OF_INPUT) { | 154 if (BuildValue(&temp_root, check_root)) { |
110 *root = temp_root; | 155 if (ParseToken().type == Token::END_OF_INPUT) { |
111 return true; | 156 *root = temp_root; |
| 157 return true; |
| 158 } else { |
| 159 SetErrorMessage(kUnexpectedDataAfterRoot, json_pos_); |
| 160 } |
112 } | 161 } |
113 | 162 |
| 163 // Default to calling errors "syntax errors". |
| 164 if (error_message_.empty()) |
| 165 SetErrorMessage(kSyntaxError, json_pos_); |
| 166 |
114 if (temp_root) | 167 if (temp_root) |
115 delete temp_root; | 168 delete temp_root; |
116 return false; | 169 return false; |
117 } | 170 } |
118 | 171 |
119 JSONReader::JSONReader(const wchar_t* json_start_pos, | |
120 bool allow_trailing_comma) | |
121 : json_pos_(json_start_pos), | |
122 stack_depth_(0), | |
123 allow_trailing_comma_(allow_trailing_comma) {} | |
124 | |
125 bool JSONReader::BuildValue(Value** node, bool is_root) { | 172 bool JSONReader::BuildValue(Value** node, bool is_root) { |
126 ++stack_depth_; | 173 ++stack_depth_; |
127 if (stack_depth_ > kStackLimit) | 174 if (stack_depth_ > kStackLimit) { |
| 175 SetErrorMessage(kTooMuchNesting, json_pos_); |
128 return false; | 176 return false; |
| 177 } |
129 | 178 |
130 Token token = ParseToken(); | 179 Token token = ParseToken(); |
131 // The root token must be an array or an object. | 180 // The root token must be an array or an object. |
132 if (is_root && token.type != Token::OBJECT_BEGIN && | 181 if (is_root && token.type != Token::OBJECT_BEGIN && |
133 token.type != Token::ARRAY_BEGIN) { | 182 token.type != Token::ARRAY_BEGIN) { |
| 183 SetErrorMessage(kBadRootElementType, json_pos_); |
134 return false; | 184 return false; |
135 } | 185 } |
136 | 186 |
137 switch (token.type) { | 187 switch (token.type) { |
138 case Token::END_OF_INPUT: | 188 case Token::END_OF_INPUT: |
139 case Token::INVALID_TOKEN: | 189 case Token::INVALID_TOKEN: |
140 return false; | 190 return false; |
141 | 191 |
142 case Token::NULL_TOKEN: | 192 case Token::NULL_TOKEN: |
143 *node = Value::CreateNullValue(); | 193 *node = Value::CreateNullValue(); |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
177 | 227 |
178 // After a list value, we expect a comma or the end of the list. | 228 // After a list value, we expect a comma or the end of the list. |
179 token = ParseToken(); | 229 token = ParseToken(); |
180 if (token.type == Token::LIST_SEPARATOR) { | 230 if (token.type == Token::LIST_SEPARATOR) { |
181 json_pos_ += token.length; | 231 json_pos_ += token.length; |
182 token = ParseToken(); | 232 token = ParseToken(); |
183 // Trailing commas are invalid according to the JSON RFC, but some | 233 // Trailing commas are invalid according to the JSON RFC, but some |
184 // consumers need the parsing leniency, so handle accordingly. | 234 // consumers need the parsing leniency, so handle accordingly. |
185 if (token.type == Token::ARRAY_END) { | 235 if (token.type == Token::ARRAY_END) { |
186 if (!allow_trailing_comma_) { | 236 if (!allow_trailing_comma_) { |
| 237 SetErrorMessage(kTrailingComma, json_pos_); |
187 delete array; | 238 delete array; |
188 return false; | 239 return false; |
189 } | 240 } |
190 // Trailing comma OK, stop parsing the Array. | 241 // Trailing comma OK, stop parsing the Array. |
191 break; | 242 break; |
192 } | 243 } |
193 } else if (token.type != Token::ARRAY_END) { | 244 } else if (token.type != Token::ARRAY_END) { |
194 // Unexpected value after list value. Bail out. | 245 // Unexpected value after list value. Bail out. |
195 delete array; | 246 delete array; |
196 return false; | 247 return false; |
197 } | 248 } |
198 } | 249 } |
199 if (token.type != Token::ARRAY_END) { | 250 if (token.type != Token::ARRAY_END) { |
200 delete array; | 251 delete array; |
201 return false; | 252 return false; |
202 } | 253 } |
203 *node = array; | 254 *node = array; |
204 break; | 255 break; |
205 } | 256 } |
206 | 257 |
207 case Token::OBJECT_BEGIN: | 258 case Token::OBJECT_BEGIN: |
208 { | 259 { |
209 json_pos_ += token.length; | 260 json_pos_ += token.length; |
210 token = ParseToken(); | 261 token = ParseToken(); |
211 | 262 |
212 DictionaryValue* dict = new DictionaryValue; | 263 DictionaryValue* dict = new DictionaryValue; |
213 while (token.type != Token::OBJECT_END) { | 264 while (token.type != Token::OBJECT_END) { |
214 if (token.type != Token::STRING) { | 265 if (token.type != Token::STRING) { |
| 266 SetErrorMessage(kUnquotedDictionaryKey, json_pos_); |
215 delete dict; | 267 delete dict; |
216 return false; | 268 return false; |
217 } | 269 } |
218 Value* dict_key_value = NULL; | 270 Value* dict_key_value = NULL; |
219 if (!DecodeString(token, &dict_key_value)) { | 271 if (!DecodeString(token, &dict_key_value)) { |
220 delete dict; | 272 delete dict; |
221 return false; | 273 return false; |
222 } | 274 } |
223 // Convert the key into a wstring. | 275 // Convert the key into a wstring. |
224 std::wstring dict_key; | 276 std::wstring dict_key; |
(...skipping 20 matching lines...) Expand all Loading... |
245 // After a key/value pair, we expect a comma or the end of the | 297 // After a key/value pair, we expect a comma or the end of the |
246 // object. | 298 // object. |
247 token = ParseToken(); | 299 token = ParseToken(); |
248 if (token.type == Token::LIST_SEPARATOR) { | 300 if (token.type == Token::LIST_SEPARATOR) { |
249 json_pos_ += token.length; | 301 json_pos_ += token.length; |
250 token = ParseToken(); | 302 token = ParseToken(); |
251 // Trailing commas are invalid according to the JSON RFC, but some | 303 // Trailing commas are invalid according to the JSON RFC, but some |
252 // consumers need the parsing leniency, so handle accordingly. | 304 // consumers need the parsing leniency, so handle accordingly. |
253 if (token.type == Token::OBJECT_END) { | 305 if (token.type == Token::OBJECT_END) { |
254 if (!allow_trailing_comma_) { | 306 if (!allow_trailing_comma_) { |
| 307 SetErrorMessage(kTrailingComma, json_pos_); |
255 delete dict; | 308 delete dict; |
256 return false; | 309 return false; |
257 } | 310 } |
258 // Trailing comma OK, stop parsing the Object. | 311 // Trailing comma OK, stop parsing the Object. |
259 break; | 312 break; |
260 } | 313 } |
261 } else if (token.type != Token::OBJECT_END) { | 314 } else if (token.type != Token::OBJECT_END) { |
262 // Unexpected value after last object value. Bail out. | 315 // Unexpected value after last object value. Bail out. |
263 delete dict; | 316 delete dict; |
264 return false; | 317 return false; |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
340 JSONReader::Token JSONReader::ParseStringToken() { | 393 JSONReader::Token JSONReader::ParseStringToken() { |
341 Token token(Token::STRING, json_pos_, 1); | 394 Token token(Token::STRING, json_pos_, 1); |
342 wchar_t c = token.NextChar(); | 395 wchar_t c = token.NextChar(); |
343 while ('\0' != c) { | 396 while ('\0' != c) { |
344 if ('\\' == c) { | 397 if ('\\' == c) { |
345 ++token.length; | 398 ++token.length; |
346 c = token.NextChar(); | 399 c = token.NextChar(); |
347 // Make sure the escaped char is valid. | 400 // Make sure the escaped char is valid. |
348 switch (c) { | 401 switch (c) { |
349 case 'x': | 402 case 'x': |
350 if (!ReadHexDigits(token, 2)) | 403 if (!ReadHexDigits(token, 2)) { |
| 404 SetErrorMessage(kInvalidEscape, json_pos_ + token.length); |
351 return kInvalidToken; | 405 return kInvalidToken; |
| 406 } |
352 break; | 407 break; |
353 case 'u': | 408 case 'u': |
354 if (!ReadHexDigits(token, 4)) | 409 if (!ReadHexDigits(token, 4)) { |
| 410 SetErrorMessage(kInvalidEscape, json_pos_ + token.length); |
355 return kInvalidToken; | 411 return kInvalidToken; |
| 412 } |
356 break; | 413 break; |
357 case '\\': | 414 case '\\': |
358 case '/': | 415 case '/': |
359 case 'b': | 416 case 'b': |
360 case 'f': | 417 case 'f': |
361 case 'n': | 418 case 'n': |
362 case 'r': | 419 case 'r': |
363 case 't': | 420 case 't': |
364 case 'v': | 421 case 'v': |
365 case '"': | 422 case '"': |
366 break; | 423 break; |
367 default: | 424 default: |
| 425 SetErrorMessage(kInvalidEscape, json_pos_ + token.length); |
368 return kInvalidToken; | 426 return kInvalidToken; |
369 } | 427 } |
370 } else if ('"' == c) { | 428 } else if ('"' == c) { |
371 ++token.length; | 429 ++token.length; |
372 return token; | 430 return token; |
373 } | 431 } |
374 ++token.length; | 432 ++token.length; |
375 c = token.NextChar(); | 433 c = token.NextChar(); |
376 } | 434 } |
377 return kInvalidToken; | 435 return kInvalidToken; |
(...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
575 default: | 633 default: |
576 ++json_pos_; | 634 ++json_pos_; |
577 } | 635 } |
578 } | 636 } |
579 } else { | 637 } else { |
580 return false; | 638 return false; |
581 } | 639 } |
582 return true; | 640 return true; |
583 } | 641 } |
584 | 642 |
| 643 void JSONReader::SetErrorMessage(const char* description, |
| 644 const wchar_t* error_pos) { |
| 645 int line_number = 1; |
| 646 int column_number = 1; |
| 647 |
| 648 // Figure out the line and column the error occured at. |
| 649 for (const wchar_t* pos = start_pos_; pos != error_pos; ++pos) { |
| 650 if (*pos == '\0') { |
| 651 NOTREACHED(); |
| 652 return; |
| 653 } |
| 654 |
| 655 if (*pos == '\n') { |
| 656 ++line_number; |
| 657 column_number = 1; |
| 658 } else { |
| 659 ++column_number; |
| 660 } |
| 661 } |
| 662 |
| 663 error_message_ = FormatErrorMessage(line_number, column_number, description); |
| 664 } |
OLD | NEW |