| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/json/json_reader.h" | 5 #include "base/json/json_reader.h" |
| 6 | 6 |
| 7 #include "base/float_util.h" | 7 #include "base/json/json_parser.h" |
| 8 #include "base/logging.h" | 8 #include "base/logging.h" |
| 9 #include "base/memory/scoped_ptr.h" | |
| 10 #include "base/stringprintf.h" | |
| 11 #include "base/string_number_conversions.h" | |
| 12 #include "base/string_piece.h" | |
| 13 #include "base/string_util.h" | |
| 14 #include "base/third_party/icu/icu_utf.h" | |
| 15 #include "base/utf_string_conversions.h" | |
| 16 #include "base/values.h" | |
| 17 | |
| 18 namespace { | |
| 19 | |
| 20 const char kNullString[] = "null"; | |
| 21 const char kTrueString[] = "true"; | |
| 22 const char kFalseString[] = "false"; | |
| 23 | |
| 24 const int kStackLimit = 100; | |
| 25 | |
| 26 // A helper method for ParseNumberToken. It reads an int from the end of | |
| 27 // token. The method returns false if there is no valid integer at the end of | |
| 28 // the token. | |
| 29 bool ReadInt(base::JSONReader::Token& token, bool can_have_leading_zeros) { | |
| 30 char first = token.NextChar(); | |
| 31 int len = 0; | |
| 32 | |
| 33 // Read in more digits. | |
| 34 char c = first; | |
| 35 while ('\0' != c && IsAsciiDigit(c)) { | |
| 36 ++token.length; | |
| 37 ++len; | |
| 38 c = token.NextChar(); | |
| 39 } | |
| 40 // We need at least 1 digit. | |
| 41 if (len == 0) | |
| 42 return false; | |
| 43 | |
| 44 if (!can_have_leading_zeros && len > 1 && '0' == first) | |
| 45 return false; | |
| 46 | |
| 47 return true; | |
| 48 } | |
| 49 | |
| 50 // A helper method for ParseStringToken. It reads |digits| hex digits from the | |
| 51 // token. If the sequence if digits is not valid (contains other characters), | |
| 52 // the method returns false. | |
| 53 bool ReadHexDigits(base::JSONReader::Token& token, int digits) { | |
| 54 for (int i = 1; i <= digits; ++i) { | |
| 55 char c = *(token.begin + token.length + i); | |
| 56 if (c == '\0' || !IsHexDigit(c)) | |
| 57 return false; | |
| 58 } | |
| 59 | |
| 60 token.length += digits; | |
| 61 return true; | |
| 62 } | |
| 63 | |
| 64 } // namespace | |
| 65 | 9 |
| 66 namespace base { | 10 namespace base { |
| 67 | 11 |
| 68 const char* JSONReader::kBadRootElementType = | |
| 69 "Root value must be an array or object."; | |
| 70 const char* JSONReader::kInvalidEscape = | 12 const char* JSONReader::kInvalidEscape = |
| 71 "Invalid escape sequence."; | 13 "Invalid escape sequence."; |
| 72 const char* JSONReader::kSyntaxError = | 14 const char* JSONReader::kSyntaxError = |
| 73 "Syntax error."; | 15 "Syntax error."; |
| 16 const char* JSONReader::kUnexpectedToken = |
| 17 "Unexpected token."; |
| 74 const char* JSONReader::kTrailingComma = | 18 const char* JSONReader::kTrailingComma = |
| 75 "Trailing comma not allowed."; | 19 "Trailing comma not allowed."; |
| 76 const char* JSONReader::kTooMuchNesting = | 20 const char* JSONReader::kTooMuchNesting = |
| 77 "Too much nesting."; | 21 "Too much nesting."; |
| 78 const char* JSONReader::kUnexpectedDataAfterRoot = | 22 const char* JSONReader::kUnexpectedDataAfterRoot = |
| 79 "Unexpected data after root element."; | 23 "Unexpected data after root element."; |
| 80 const char* JSONReader::kUnsupportedEncoding = | 24 const char* JSONReader::kUnsupportedEncoding = |
| 81 "Unsupported encoding. JSON must be UTF-8."; | 25 "Unsupported encoding. JSON must be UTF-8."; |
| 82 const char* JSONReader::kUnquotedDictionaryKey = | 26 const char* JSONReader::kUnquotedDictionaryKey = |
| 83 "Dictionary keys must be quoted."; | 27 "Dictionary keys must be quoted."; |
| 84 | 28 |
| 85 JSONReader::JSONReader() | 29 JSONReader::JSONReader() |
| 86 : start_pos_(NULL), | 30 : parser_(new internal::JSONParser(JSON_PARSE_RFC)) { |
| 87 json_pos_(NULL), | 31 } |
| 88 end_pos_(NULL), | 32 |
| 89 stack_depth_(0), | 33 JSONReader::JSONReader(int options) |
| 90 allow_trailing_comma_(false), | 34 : parser_(new internal::JSONParser(options)) { |
| 91 error_code_(JSON_NO_ERROR), | 35 } |
| 92 error_line_(0), | 36 |
| 93 error_col_(0) {} | 37 JSONReader::~JSONReader() { |
| 38 } |
| 94 | 39 |
| 95 // static | 40 // static |
| 96 Value* JSONReader::Read(const std::string& json) { | 41 Value* JSONReader::Read(const std::string& json) { |
| 97 return Read(json, JSON_PARSE_RFC); | 42 internal::JSONParser parser(JSON_PARSE_RFC); |
| 43 return parser.Parse(json); |
| 98 } | 44 } |
| 99 | 45 |
| 100 // static | 46 // static |
| 101 Value* JSONReader::Read(const std::string& json, | 47 Value* JSONReader::Read(const std::string& json, |
| 102 int options) { | 48 int options) { |
| 103 return ReadAndReturnError(json, options, NULL, NULL); | 49 internal::JSONParser parser(options); |
| 50 return parser.Parse(json); |
| 104 } | 51 } |
| 105 | 52 |
| 106 // static | 53 // static |
| 107 Value* JSONReader::ReadAndReturnError(const std::string& json, | 54 Value* JSONReader::ReadAndReturnError(const std::string& json, |
| 108 int options, | 55 int options, |
| 109 int* error_code_out, | 56 int* error_code_out, |
| 110 std::string* error_msg_out) { | 57 std::string* error_msg_out) { |
| 111 JSONReader reader = JSONReader(); | 58 internal::JSONParser parser(options); |
| 112 Value* root = reader.JsonToValue(json, false, | 59 Value* root = parser.Parse(json); |
| 113 (options & JSON_ALLOW_TRAILING_COMMAS) != 0); | |
| 114 if (root) | 60 if (root) |
| 115 return root; | 61 return root; |
| 116 | 62 |
| 117 if (error_code_out) | 63 if (error_code_out) |
| 118 *error_code_out = reader.error_code(); | 64 *error_code_out = parser.error_code(); |
| 119 if (error_msg_out) | 65 if (error_msg_out) |
| 120 *error_msg_out = reader.GetErrorMessage(); | 66 *error_msg_out = parser.GetErrorMessage(); |
| 121 | 67 |
| 122 return NULL; | 68 return NULL; |
| 123 } | 69 } |
| 124 | 70 |
| 125 // static | 71 // static |
| 126 std::string JSONReader::ErrorCodeToString(JsonParseError error_code) { | 72 std::string JSONReader::ErrorCodeToString(JsonParseError error_code) { |
| 127 switch (error_code) { | 73 switch (error_code) { |
| 128 case JSON_NO_ERROR: | 74 case JSON_NO_ERROR: |
| 129 return std::string(); | 75 return std::string(); |
| 130 case JSON_BAD_ROOT_ELEMENT_TYPE: | |
| 131 return kBadRootElementType; | |
| 132 case JSON_INVALID_ESCAPE: | 76 case JSON_INVALID_ESCAPE: |
| 133 return kInvalidEscape; | 77 return kInvalidEscape; |
| 134 case JSON_SYNTAX_ERROR: | 78 case JSON_SYNTAX_ERROR: |
| 135 return kSyntaxError; | 79 return kSyntaxError; |
| 80 case JSON_UNEXPECTED_TOKEN: |
| 81 return kUnexpectedToken; |
| 136 case JSON_TRAILING_COMMA: | 82 case JSON_TRAILING_COMMA: |
| 137 return kTrailingComma; | 83 return kTrailingComma; |
| 138 case JSON_TOO_MUCH_NESTING: | 84 case JSON_TOO_MUCH_NESTING: |
| 139 return kTooMuchNesting; | 85 return kTooMuchNesting; |
| 140 case JSON_UNEXPECTED_DATA_AFTER_ROOT: | 86 case JSON_UNEXPECTED_DATA_AFTER_ROOT: |
| 141 return kUnexpectedDataAfterRoot; | 87 return kUnexpectedDataAfterRoot; |
| 142 case JSON_UNSUPPORTED_ENCODING: | 88 case JSON_UNSUPPORTED_ENCODING: |
| 143 return kUnsupportedEncoding; | 89 return kUnsupportedEncoding; |
| 144 case JSON_UNQUOTED_DICTIONARY_KEY: | 90 case JSON_UNQUOTED_DICTIONARY_KEY: |
| 145 return kUnquotedDictionaryKey; | 91 return kUnquotedDictionaryKey; |
| 146 default: | 92 default: |
| 147 NOTREACHED(); | 93 NOTREACHED(); |
| 148 return std::string(); | 94 return std::string(); |
| 149 } | 95 } |
| 150 } | 96 } |
| 151 | 97 |
| 152 std::string JSONReader::GetErrorMessage() const { | 98 Value* JSONReader::ReadToValue(const std::string& json) { |
| 153 return FormatErrorMessage(error_line_, error_col_, | 99 return parser_->Parse(json); |
| 154 ErrorCodeToString(error_code_)); | |
| 155 } | 100 } |
| 156 | 101 |
| 157 Value* JSONReader::JsonToValue(const std::string& json, bool check_root, | 102 JSONReader::JsonParseError JSONReader::error_code() const { |
| 158 bool allow_trailing_comma) { | 103 return parser_->error_code(); |
| 159 // The input must be in UTF-8. | |
| 160 if (!IsStringUTF8(json.data())) { | |
| 161 error_code_ = JSON_UNSUPPORTED_ENCODING; | |
| 162 return NULL; | |
| 163 } | |
| 164 | |
| 165 start_pos_ = json.data(); | |
| 166 end_pos_ = start_pos_ + json.size(); | |
| 167 | |
| 168 // When the input JSON string starts with a UTF-8 Byte-Order-Mark (U+FEFF) | |
| 169 // or <0xEF 0xBB 0xBF>, advance the start position to avoid the | |
| 170 // JSONReader::BuildValue() function from mis-treating a Unicode BOM as an | |
| 171 // invalid character and returning NULL. | |
| 172 if (json.size() >= 3 && static_cast<uint8>(start_pos_[0]) == 0xEF && | |
| 173 static_cast<uint8>(start_pos_[1]) == 0xBB && | |
| 174 static_cast<uint8>(start_pos_[2]) == 0xBF) { | |
| 175 start_pos_ += 3; | |
| 176 } | |
| 177 | |
| 178 json_pos_ = start_pos_; | |
| 179 allow_trailing_comma_ = allow_trailing_comma; | |
| 180 stack_depth_ = 0; | |
| 181 error_code_ = JSON_NO_ERROR; | |
| 182 | |
| 183 scoped_ptr<Value> root(BuildValue(check_root)); | |
| 184 if (root.get()) { | |
| 185 if (ParseToken().type == Token::END_OF_INPUT) { | |
| 186 return root.release(); | |
| 187 } else { | |
| 188 SetErrorCode(JSON_UNEXPECTED_DATA_AFTER_ROOT, json_pos_); | |
| 189 } | |
| 190 } | |
| 191 | |
| 192 // Default to calling errors "syntax errors". | |
| 193 if (error_code_ == 0) | |
| 194 SetErrorCode(JSON_SYNTAX_ERROR, json_pos_); | |
| 195 | |
| 196 return NULL; | |
| 197 } | 104 } |
| 198 | 105 |
| 199 // static | 106 std::string JSONReader::GetErrorMessage() const { |
| 200 std::string JSONReader::FormatErrorMessage(int line, int column, | 107 return parser_->GetErrorMessage(); |
| 201 const std::string& description) { | |
| 202 if (line || column) { | |
| 203 return base::StringPrintf( | |
| 204 "Line: %i, column: %i, %s", line, column, description.c_str()); | |
| 205 } | |
| 206 return description; | |
| 207 } | |
| 208 | |
| 209 Value* JSONReader::BuildValue(bool is_root) { | |
| 210 ++stack_depth_; | |
| 211 if (stack_depth_ > kStackLimit) { | |
| 212 SetErrorCode(JSON_TOO_MUCH_NESTING, json_pos_); | |
| 213 return NULL; | |
| 214 } | |
| 215 | |
| 216 Token token = ParseToken(); | |
| 217 // The root token must be an array or an object. | |
| 218 if (is_root && token.type != Token::OBJECT_BEGIN && | |
| 219 token.type != Token::ARRAY_BEGIN) { | |
| 220 SetErrorCode(JSON_BAD_ROOT_ELEMENT_TYPE, json_pos_); | |
| 221 return NULL; | |
| 222 } | |
| 223 | |
| 224 scoped_ptr<Value> node; | |
| 225 | |
| 226 switch (token.type) { | |
| 227 case Token::END_OF_INPUT: | |
| 228 case Token::INVALID_TOKEN: | |
| 229 return NULL; | |
| 230 | |
| 231 case Token::NULL_TOKEN: | |
| 232 node.reset(Value::CreateNullValue()); | |
| 233 break; | |
| 234 | |
| 235 case Token::BOOL_TRUE: | |
| 236 node.reset(Value::CreateBooleanValue(true)); | |
| 237 break; | |
| 238 | |
| 239 case Token::BOOL_FALSE: | |
| 240 node.reset(Value::CreateBooleanValue(false)); | |
| 241 break; | |
| 242 | |
| 243 case Token::NUMBER: | |
| 244 node.reset(DecodeNumber(token)); | |
| 245 if (!node.get()) | |
| 246 return NULL; | |
| 247 break; | |
| 248 | |
| 249 case Token::STRING: | |
| 250 node.reset(DecodeString(token)); | |
| 251 if (!node.get()) | |
| 252 return NULL; | |
| 253 break; | |
| 254 | |
| 255 case Token::ARRAY_BEGIN: | |
| 256 { | |
| 257 json_pos_ += token.length; | |
| 258 token = ParseToken(); | |
| 259 | |
| 260 node.reset(new ListValue()); | |
| 261 while (token.type != Token::ARRAY_END) { | |
| 262 Value* array_node = BuildValue(false); | |
| 263 if (!array_node) | |
| 264 return NULL; | |
| 265 static_cast<ListValue*>(node.get())->Append(array_node); | |
| 266 | |
| 267 // After a list value, we expect a comma or the end of the list. | |
| 268 token = ParseToken(); | |
| 269 if (token.type == Token::LIST_SEPARATOR) { | |
| 270 json_pos_ += token.length; | |
| 271 token = ParseToken(); | |
| 272 // Trailing commas are invalid according to the JSON RFC, but some | |
| 273 // consumers need the parsing leniency, so handle accordingly. | |
| 274 if (token.type == Token::ARRAY_END) { | |
| 275 if (!allow_trailing_comma_) { | |
| 276 SetErrorCode(JSON_TRAILING_COMMA, json_pos_); | |
| 277 return NULL; | |
| 278 } | |
| 279 // Trailing comma OK, stop parsing the Array. | |
| 280 break; | |
| 281 } | |
| 282 } else if (token.type != Token::ARRAY_END) { | |
| 283 // Unexpected value after list value. Bail out. | |
| 284 return NULL; | |
| 285 } | |
| 286 } | |
| 287 if (token.type != Token::ARRAY_END) { | |
| 288 return NULL; | |
| 289 } | |
| 290 break; | |
| 291 } | |
| 292 | |
| 293 case Token::OBJECT_BEGIN: | |
| 294 { | |
| 295 json_pos_ += token.length; | |
| 296 token = ParseToken(); | |
| 297 | |
| 298 node.reset(new DictionaryValue); | |
| 299 while (token.type != Token::OBJECT_END) { | |
| 300 if (token.type != Token::STRING) { | |
| 301 SetErrorCode(JSON_UNQUOTED_DICTIONARY_KEY, json_pos_); | |
| 302 return NULL; | |
| 303 } | |
| 304 scoped_ptr<Value> dict_key_value(DecodeString(token)); | |
| 305 if (!dict_key_value.get()) | |
| 306 return NULL; | |
| 307 | |
| 308 // Convert the key into a wstring. | |
| 309 std::string dict_key; | |
| 310 bool success = dict_key_value->GetAsString(&dict_key); | |
| 311 DCHECK(success); | |
| 312 | |
| 313 json_pos_ += token.length; | |
| 314 token = ParseToken(); | |
| 315 if (token.type != Token::OBJECT_PAIR_SEPARATOR) | |
| 316 return NULL; | |
| 317 | |
| 318 json_pos_ += token.length; | |
| 319 token = ParseToken(); | |
| 320 Value* dict_value = BuildValue(false); | |
| 321 if (!dict_value) | |
| 322 return NULL; | |
| 323 static_cast<DictionaryValue*>(node.get())->SetWithoutPathExpansion( | |
| 324 dict_key, dict_value); | |
| 325 | |
| 326 // After a key/value pair, we expect a comma or the end of the | |
| 327 // object. | |
| 328 token = ParseToken(); | |
| 329 if (token.type == Token::LIST_SEPARATOR) { | |
| 330 json_pos_ += token.length; | |
| 331 token = ParseToken(); | |
| 332 // Trailing commas are invalid according to the JSON RFC, but some | |
| 333 // consumers need the parsing leniency, so handle accordingly. | |
| 334 if (token.type == Token::OBJECT_END) { | |
| 335 if (!allow_trailing_comma_) { | |
| 336 SetErrorCode(JSON_TRAILING_COMMA, json_pos_); | |
| 337 return NULL; | |
| 338 } | |
| 339 // Trailing comma OK, stop parsing the Object. | |
| 340 break; | |
| 341 } | |
| 342 } else if (token.type != Token::OBJECT_END) { | |
| 343 // Unexpected value after last object value. Bail out. | |
| 344 return NULL; | |
| 345 } | |
| 346 } | |
| 347 if (token.type != Token::OBJECT_END) | |
| 348 return NULL; | |
| 349 | |
| 350 break; | |
| 351 } | |
| 352 | |
| 353 default: | |
| 354 // We got a token that's not a value. | |
| 355 return NULL; | |
| 356 } | |
| 357 json_pos_ += token.length; | |
| 358 | |
| 359 --stack_depth_; | |
| 360 return node.release(); | |
| 361 } | |
| 362 | |
| 363 JSONReader::Token JSONReader::ParseNumberToken() { | |
| 364 // We just grab the number here. We validate the size in DecodeNumber. | |
| 365 // According to RFC4627, a valid number is: [minus] int [frac] [exp] | |
| 366 Token token(Token::NUMBER, json_pos_, 0); | |
| 367 char c = *json_pos_; | |
| 368 if ('-' == c) { | |
| 369 ++token.length; | |
| 370 c = token.NextChar(); | |
| 371 } | |
| 372 | |
| 373 if (!ReadInt(token, false)) | |
| 374 return Token::CreateInvalidToken(); | |
| 375 | |
| 376 // Optional fraction part | |
| 377 c = token.NextChar(); | |
| 378 if ('.' == c) { | |
| 379 ++token.length; | |
| 380 if (!ReadInt(token, true)) | |
| 381 return Token::CreateInvalidToken(); | |
| 382 c = token.NextChar(); | |
| 383 } | |
| 384 | |
| 385 // Optional exponent part | |
| 386 if ('e' == c || 'E' == c) { | |
| 387 ++token.length; | |
| 388 c = token.NextChar(); | |
| 389 if ('-' == c || '+' == c) { | |
| 390 ++token.length; | |
| 391 c = token.NextChar(); | |
| 392 } | |
| 393 if (!ReadInt(token, true)) | |
| 394 return Token::CreateInvalidToken(); | |
| 395 } | |
| 396 | |
| 397 return token; | |
| 398 } | |
| 399 | |
| 400 Value* JSONReader::DecodeNumber(const Token& token) { | |
| 401 const std::string num_string(token.begin, token.length); | |
| 402 | |
| 403 int num_int; | |
| 404 if (StringToInt(num_string, &num_int)) | |
| 405 return Value::CreateIntegerValue(num_int); | |
| 406 | |
| 407 double num_double; | |
| 408 if (StringToDouble(num_string, &num_double) && base::IsFinite(num_double)) | |
| 409 return Value::CreateDoubleValue(num_double); | |
| 410 | |
| 411 return NULL; | |
| 412 } | |
| 413 | |
| 414 JSONReader::Token JSONReader::ParseStringToken() { | |
| 415 Token token(Token::STRING, json_pos_, 1); | |
| 416 char c = token.NextChar(); | |
| 417 while (json_pos_ + token.length < end_pos_) { | |
| 418 if ('\\' == c) { | |
| 419 ++token.length; | |
| 420 c = token.NextChar(); | |
| 421 // Make sure the escaped char is valid. | |
| 422 switch (c) { | |
| 423 case 'x': | |
| 424 if (!ReadHexDigits(token, 2)) { | |
| 425 SetErrorCode(JSON_INVALID_ESCAPE, json_pos_ + token.length); | |
| 426 return Token::CreateInvalidToken(); | |
| 427 } | |
| 428 break; | |
| 429 case 'u': | |
| 430 if (!ReadHexDigits(token, 4)) { | |
| 431 SetErrorCode(JSON_INVALID_ESCAPE, json_pos_ + token.length); | |
| 432 return Token::CreateInvalidToken(); | |
| 433 } | |
| 434 break; | |
| 435 case '\\': | |
| 436 case '/': | |
| 437 case 'b': | |
| 438 case 'f': | |
| 439 case 'n': | |
| 440 case 'r': | |
| 441 case 't': | |
| 442 case 'v': | |
| 443 case '"': | |
| 444 break; | |
| 445 default: | |
| 446 SetErrorCode(JSON_INVALID_ESCAPE, json_pos_ + token.length); | |
| 447 return Token::CreateInvalidToken(); | |
| 448 } | |
| 449 } else if ('"' == c) { | |
| 450 ++token.length; | |
| 451 return token; | |
| 452 } | |
| 453 ++token.length; | |
| 454 c = token.NextChar(); | |
| 455 } | |
| 456 return Token::CreateInvalidToken(); | |
| 457 } | |
| 458 | |
| 459 Value* JSONReader::DecodeString(const Token& token) { | |
| 460 std::string decoded_str; | |
| 461 decoded_str.reserve(token.length - 2); | |
| 462 | |
| 463 for (int i = 1; i < token.length - 1; ++i) { | |
| 464 char c = *(token.begin + i); | |
| 465 if ('\\' == c) { | |
| 466 ++i; | |
| 467 c = *(token.begin + i); | |
| 468 switch (c) { | |
| 469 case '"': | |
| 470 case '/': | |
| 471 case '\\': | |
| 472 decoded_str.push_back(c); | |
| 473 break; | |
| 474 case 'b': | |
| 475 decoded_str.push_back('\b'); | |
| 476 break; | |
| 477 case 'f': | |
| 478 decoded_str.push_back('\f'); | |
| 479 break; | |
| 480 case 'n': | |
| 481 decoded_str.push_back('\n'); | |
| 482 break; | |
| 483 case 'r': | |
| 484 decoded_str.push_back('\r'); | |
| 485 break; | |
| 486 case 't': | |
| 487 decoded_str.push_back('\t'); | |
| 488 break; | |
| 489 case 'v': | |
| 490 decoded_str.push_back('\v'); | |
| 491 break; | |
| 492 | |
| 493 case 'x': { | |
| 494 if (i + 2 >= token.length) | |
| 495 return NULL; | |
| 496 int hex_digit = 0; | |
| 497 if (!HexStringToInt(StringPiece(token.begin + i + 1, 2), &hex_digit)) | |
| 498 return NULL; | |
| 499 decoded_str.push_back(hex_digit); | |
| 500 i += 2; | |
| 501 break; | |
| 502 } | |
| 503 case 'u': | |
| 504 if (!ConvertUTF16Units(token, &i, &decoded_str)) | |
| 505 return NULL; | |
| 506 break; | |
| 507 | |
| 508 default: | |
| 509 // We should only have valid strings at this point. If not, | |
| 510 // ParseStringToken didn't do its job. | |
| 511 NOTREACHED(); | |
| 512 return NULL; | |
| 513 } | |
| 514 } else { | |
| 515 // Not escaped | |
| 516 decoded_str.push_back(c); | |
| 517 } | |
| 518 } | |
| 519 return Value::CreateStringValue(decoded_str); | |
| 520 } | |
| 521 | |
| 522 bool JSONReader::ConvertUTF16Units(const Token& token, | |
| 523 int* i, | |
| 524 std::string* dest_string) { | |
| 525 if (*i + 4 >= token.length) | |
| 526 return false; | |
| 527 | |
| 528 // This is a 32-bit field because the shift operations in the | |
| 529 // conversion process below cause MSVC to error about "data loss." | |
| 530 // This only stores UTF-16 code units, though. | |
| 531 // Consume the UTF-16 code unit, which may be a high surrogate. | |
| 532 int code_unit16_high = 0; | |
| 533 if (!HexStringToInt(StringPiece(token.begin + *i + 1, 4), &code_unit16_high)) | |
| 534 return false; | |
| 535 *i += 4; | |
| 536 | |
| 537 // If this is a high surrogate, consume the next code unit to get the | |
| 538 // low surrogate. | |
| 539 int code_unit16_low = 0; | |
| 540 if (CBU16_IS_SURROGATE(code_unit16_high)) { | |
| 541 // Make sure this is the high surrogate. If not, it's an encoding | |
| 542 // error. | |
| 543 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) | |
| 544 return false; | |
| 545 | |
| 546 // Make sure that the token has more characters to consume the | |
| 547 // lower surrogate. | |
| 548 if (*i + 6 >= token.length) | |
| 549 return false; | |
| 550 if (*(++(*i) + token.begin) != '\\' || *(++(*i) + token.begin) != 'u') | |
| 551 return false; | |
| 552 | |
| 553 if (!HexStringToInt(StringPiece(token.begin + *i + 1, 4), &code_unit16_low)) | |
| 554 return false; | |
| 555 *i += 4; | |
| 556 if (!CBU16_IS_SURROGATE(code_unit16_low) || | |
| 557 !CBU16_IS_TRAIL(code_unit16_low)) { | |
| 558 return false; | |
| 559 } | |
| 560 } else if (!CBU16_IS_SINGLE(code_unit16_high)) { | |
| 561 // If this is not a code point, it's an encoding error. | |
| 562 return false; | |
| 563 } | |
| 564 | |
| 565 // Convert the UTF-16 code units to a code point and then to a UTF-8 | |
| 566 // code unit sequence. | |
| 567 char code_point[8] = { 0 }; | |
| 568 size_t offset = 0; | |
| 569 if (!code_unit16_low) { | |
| 570 CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high); | |
| 571 } else { | |
| 572 uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high, | |
| 573 code_unit16_low); | |
| 574 offset = 0; | |
| 575 CBU8_APPEND_UNSAFE(code_point, offset, code_unit32); | |
| 576 } | |
| 577 dest_string->append(code_point); | |
| 578 return true; | |
| 579 } | |
| 580 | |
| 581 JSONReader::Token JSONReader::ParseToken() { | |
| 582 EatWhitespaceAndComments(); | |
| 583 | |
| 584 Token token(Token::INVALID_TOKEN, 0, 0); | |
| 585 switch (*json_pos_) { | |
| 586 case '\0': | |
| 587 token.type = Token::END_OF_INPUT; | |
| 588 break; | |
| 589 | |
| 590 case 'n': | |
| 591 if (NextStringMatch(kNullString, arraysize(kNullString) - 1)) | |
| 592 token = Token(Token::NULL_TOKEN, json_pos_, 4); | |
| 593 break; | |
| 594 | |
| 595 case 't': | |
| 596 if (NextStringMatch(kTrueString, arraysize(kTrueString) - 1)) | |
| 597 token = Token(Token::BOOL_TRUE, json_pos_, 4); | |
| 598 break; | |
| 599 | |
| 600 case 'f': | |
| 601 if (NextStringMatch(kFalseString, arraysize(kFalseString) - 1)) | |
| 602 token = Token(Token::BOOL_FALSE, json_pos_, 5); | |
| 603 break; | |
| 604 | |
| 605 case '[': | |
| 606 token = Token(Token::ARRAY_BEGIN, json_pos_, 1); | |
| 607 break; | |
| 608 | |
| 609 case ']': | |
| 610 token = Token(Token::ARRAY_END, json_pos_, 1); | |
| 611 break; | |
| 612 | |
| 613 case ',': | |
| 614 token = Token(Token::LIST_SEPARATOR, json_pos_, 1); | |
| 615 break; | |
| 616 | |
| 617 case '{': | |
| 618 token = Token(Token::OBJECT_BEGIN, json_pos_, 1); | |
| 619 break; | |
| 620 | |
| 621 case '}': | |
| 622 token = Token(Token::OBJECT_END, json_pos_, 1); | |
| 623 break; | |
| 624 | |
| 625 case ':': | |
| 626 token = Token(Token::OBJECT_PAIR_SEPARATOR, json_pos_, 1); | |
| 627 break; | |
| 628 | |
| 629 case '0': | |
| 630 case '1': | |
| 631 case '2': | |
| 632 case '3': | |
| 633 case '4': | |
| 634 case '5': | |
| 635 case '6': | |
| 636 case '7': | |
| 637 case '8': | |
| 638 case '9': | |
| 639 case '-': | |
| 640 token = ParseNumberToken(); | |
| 641 break; | |
| 642 | |
| 643 case '"': | |
| 644 token = ParseStringToken(); | |
| 645 break; | |
| 646 } | |
| 647 return token; | |
| 648 } | |
| 649 | |
| 650 void JSONReader::EatWhitespaceAndComments() { | |
| 651 while (json_pos_ != end_pos_) { | |
| 652 switch (*json_pos_) { | |
| 653 case ' ': | |
| 654 case '\n': | |
| 655 case '\r': | |
| 656 case '\t': | |
| 657 ++json_pos_; | |
| 658 break; | |
| 659 case '/': | |
| 660 // TODO(tc): This isn't in the RFC so it should be a parser flag. | |
| 661 if (!EatComment()) | |
| 662 return; | |
| 663 break; | |
| 664 default: | |
| 665 // Not a whitespace char, just exit. | |
| 666 return; | |
| 667 } | |
| 668 } | |
| 669 } | |
| 670 | |
| 671 bool JSONReader::EatComment() { | |
| 672 if ('/' != *json_pos_) | |
| 673 return false; | |
| 674 | |
| 675 char next_char = *(json_pos_ + 1); | |
| 676 if ('/' == next_char) { | |
| 677 // Line comment, read until \n or \r | |
| 678 json_pos_ += 2; | |
| 679 while (json_pos_ != end_pos_) { | |
| 680 switch (*json_pos_) { | |
| 681 case '\n': | |
| 682 case '\r': | |
| 683 ++json_pos_; | |
| 684 return true; | |
| 685 default: | |
| 686 ++json_pos_; | |
| 687 } | |
| 688 } | |
| 689 } else if ('*' == next_char) { | |
| 690 // Block comment, read until */ | |
| 691 json_pos_ += 2; | |
| 692 while (json_pos_ != end_pos_) { | |
| 693 if ('*' == *json_pos_ && '/' == *(json_pos_ + 1)) { | |
| 694 json_pos_ += 2; | |
| 695 return true; | |
| 696 } | |
| 697 ++json_pos_; | |
| 698 } | |
| 699 } else { | |
| 700 return false; | |
| 701 } | |
| 702 return true; | |
| 703 } | |
| 704 | |
| 705 bool JSONReader::NextStringMatch(const char* str, size_t length) { | |
| 706 return strncmp(json_pos_, str, length) == 0; | |
| 707 } | |
| 708 | |
| 709 void JSONReader::SetErrorCode(JsonParseError error, | |
| 710 const char* error_pos) { | |
| 711 int line_number = 1; | |
| 712 int column_number = 1; | |
| 713 | |
| 714 // Figure out the line and column the error occured at. | |
| 715 for (const char* pos = start_pos_; pos != error_pos; ++pos) { | |
| 716 if (pos > end_pos_) { | |
| 717 NOTREACHED(); | |
| 718 return; | |
| 719 } | |
| 720 | |
| 721 if (*pos == '\n') { | |
| 722 ++line_number; | |
| 723 column_number = 1; | |
| 724 } else { | |
| 725 ++column_number; | |
| 726 } | |
| 727 } | |
| 728 | |
| 729 error_line_ = line_number; | |
| 730 error_col_ = column_number; | |
| 731 error_code_ = error; | |
| 732 } | 108 } |
| 733 | 109 |
| 734 } // namespace base | 110 } // namespace base |
| OLD | NEW |