Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "base/json/json_parser.h" | |
| 6 | |
| 7 #include "base/float_util.h" | |
| 8 #include "base/logging.h" | |
| 9 #include "base/memory/scoped_ptr.h" | |
| 10 #include "base/string_number_conversions.h" | |
| 11 #include "base/string_util.h" | |
| 12 #include "base/stringprintf.h" | |
| 13 #include "base/third_party/icu/icu_utf.h" | |
| 14 #include "base/utf_string_conversion_utils.h" | |
| 15 #include "base/utf_string_conversions.h" | |
| 16 #include "base/values.h" | |
| 17 | |
| 18 namespace base { | |
| 19 namespace internal { | |
| 20 | |
| 21 namespace { | |
| 22 | |
| 23 const int kStackMaxDepth = 100; | |
| 24 | |
| 25 const int32 kExtendedASCIIStart = 0x80; | |
| 26 | |
| 27 // This and the class below are used to own the JSON input string for when | |
| 28 // string tokens are stored as StringPiece instead of std::string. This | |
| 29 // optimization avoids about 2/3rds of string memory copies. The constructor | |
| 30 // takes the input string and swaps its data into the new instance. The real | |
| 31 // root value is also Swap()ed into the new instance. | |
| 32 class DictionaryHiddenRootValue : public base::DictionaryValue { | |
| 33 public: | |
| 34 DictionaryHiddenRootValue(std::string* json, Value* root) { | |
| 35 DCHECK(root->IsType(Value::TYPE_DICTIONARY)); | |
| 36 DictionaryValue::Swap(static_cast<DictionaryValue*>(root)); | |
| 37 json->swap(json_); | |
| 38 } | |
| 39 | |
| 40 virtual void Swap(DictionaryValue* other) OVERRIDE { | |
| 41 DLOG(1) << "Swap()ing a DictionaryValue inefficiently."; | |
|
Mark Mentovai
2012/05/08 20:19:41
DLOG(1) is DLOG(WARNING). You either meant that or
Robert Sesek
2012/05/15 16:57:51
I've said it before, but it bears repeating: we ha
| |
| 42 | |
| 43 // First deep copy to convert JSONStringValue to std::string and swap that | |
| 44 // copy with |other|, which contains the new contents of |this|. | |
| 45 scoped_ptr<base::DictionaryValue> copy(DeepCopy()); | |
| 46 copy->Swap(other); | |
| 47 | |
| 48 // Then erase the contents of the current dictionary and swap in the | |
| 49 // new contents, originally from |other|. | |
| 50 Clear(); | |
| 51 json_.clear(); | |
| 52 DictionaryValue::Swap(copy.get()); | |
| 53 } | |
| 54 | |
| 55 // Not overriding DictionaryValue::Remove because it just calls through to | |
| 56 // the method below. | |
| 57 | |
| 58 virtual bool RemoveWithoutPathExpansion(const std::string& key, | |
| 59 Value** out) OVERRIDE { | |
| 60 // If the caller won't take ownership of the removed value, just call up. | |
| 61 if (!out) | |
| 62 return DictionaryValue::RemoveWithoutPathExpansion(key, out); | |
| 63 | |
| 64 DLOG(1) << "Remove()ing from a DictionaryValue inefficiently."; | |
| 65 | |
| 66 // Otherwise, remove the value while its still "owned" by this and copy it | |
| 67 // to convert any JSONStringValues to std::string. | |
| 68 Value* out_owned = NULL; | |
| 69 if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned)) | |
| 70 return false; | |
| 71 | |
| 72 *out = out_owned->DeepCopy(); | |
| 73 delete out_owned; | |
| 74 | |
| 75 return true; | |
| 76 } | |
| 77 | |
| 78 private: | |
| 79 std::string json_; | |
| 80 | |
| 81 DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue); | |
| 82 }; | |
| 83 | |
| 84 class ListHiddenRootValue : public base::ListValue { | |
| 85 public: | |
| 86 ListHiddenRootValue(std::string* json, Value* root) { | |
| 87 DCHECK(root->IsType(Value::TYPE_LIST)); | |
| 88 ListValue::Swap(static_cast<ListValue*>(root)); | |
| 89 json->swap(json_); | |
| 90 } | |
| 91 | |
| 92 virtual void Swap(ListValue* other) OVERRIDE { | |
| 93 DLOG(1) << "Swap()ing a ListValue inefficiently."; | |
| 94 | |
| 95 // First deep copy to convert JSONStringValue to std::string and swap that | |
| 96 // copy with |other|, which contains the new contents of |this|. | |
| 97 scoped_ptr<base::ListValue> copy(DeepCopy()); | |
| 98 copy->Swap(other); | |
| 99 | |
| 100 // Then erase the contents of the current list and swap in the new contents, | |
| 101 // originally from |other|. | |
| 102 Clear(); | |
| 103 json_.clear(); | |
| 104 ListValue::Swap(copy.get()); | |
| 105 } | |
| 106 | |
| 107 virtual bool Remove(size_t index, Value** out) OVERRIDE { | |
| 108 // If the caller won't take ownership of the removed value, just call up. | |
| 109 if (!out) | |
| 110 return ListValue::Remove(index, out); | |
| 111 | |
| 112 DLOG(1) << "Remove()ing from a ListValue inefficiently."; | |
| 113 | |
| 114 // Otherwise, remove the value while its still "owned" by this and copy it | |
| 115 // to convert any JSONStringValues to std::string. | |
| 116 Value* out_owned = NULL; | |
| 117 if (!ListValue::Remove(index, &out_owned)) | |
| 118 return false; | |
| 119 | |
| 120 *out = out_owned->DeepCopy(); | |
| 121 delete out_owned; | |
| 122 | |
| 123 return true; | |
| 124 } | |
| 125 | |
| 126 private: | |
| 127 std::string json_; | |
| 128 | |
| 129 DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue); | |
| 130 }; | |
| 131 | |
| 132 // A variant on StringValue that uses StringPiece instead of copying the string | |
| 133 // into the Value. This can only be stored in a child of hidden root (above), | |
| 134 // otherwise the referenced string will not be guaranteed to outlive it. | |
| 135 class JSONStringValue : public base::Value { | |
| 136 public: | |
| 137 explicit JSONStringValue(const base::StringPiece& piece) | |
| 138 : Value(TYPE_STRING), | |
| 139 string_piece_(piece) { | |
| 140 } | |
| 141 | |
| 142 // Value: | |
| 143 bool GetAsString(std::string* out_value) const OVERRIDE { | |
| 144 string_piece_.CopyToString(out_value); | |
| 145 return true; | |
| 146 } | |
| 147 bool GetAsString(string16* out_value) const OVERRIDE { | |
| 148 *out_value = UTF8ToUTF16(string_piece_); | |
| 149 return true; | |
| 150 } | |
| 151 virtual Value* DeepCopy() const OVERRIDE { | |
| 152 return Value::CreateStringValue(string_piece_.as_string()); | |
| 153 } | |
| 154 virtual bool Equals(const Value* other) const OVERRIDE { | |
| 155 std::string other_string; | |
| 156 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) && | |
| 157 StringPiece(other_string) == string_piece_; | |
| 158 } | |
| 159 | |
| 160 private: | |
| 161 // The location in the original input stream. | |
| 162 base::StringPiece string_piece_; | |
| 163 | |
| 164 DISALLOW_COPY_AND_ASSIGN(JSONStringValue); | |
| 165 }; | |
| 166 | |
| 167 // Simple class that checks for maximum recursion/"stack overflow." | |
| 168 class StackMarker { | |
| 169 public: | |
| 170 explicit StackMarker(int* depth) : depth_(depth) { | |
| 171 ++(*depth_); | |
| 172 } | |
|
Mark Mentovai
2012/05/08 20:19:41
You should (D)CHECK here that depth <= kStackMaxDe
Robert Sesek
2012/05/15 16:57:51
Done.
| |
| 173 ~StackMarker() { | |
| 174 --(*depth_); | |
| 175 } | |
| 176 | |
| 177 bool IsTooDeep() const { | |
| 178 return *depth_ >= kStackMaxDepth; | |
| 179 } | |
| 180 | |
| 181 private: | |
| 182 int* const depth_; | |
| 183 | |
| 184 DISALLOW_COPY_AND_ASSIGN(StackMarker); | |
| 185 }; | |
| 186 | |
| 187 } // namespace | |
| 188 | |
| 189 JSONParser::JSONParser(int options) | |
| 190 : options_(options), | |
| 191 start_pos_(NULL), | |
| 192 pos_(0), | |
|
tfarina
2012/05/04 00:25:28
nit: just curious why did you choose 0 to initiali
Robert Sesek
2012/05/15 16:57:51
Done.
| |
| 193 end_pos_(0), | |
| 194 index_(0), | |
| 195 stack_depth_(0), | |
| 196 line_number_(0), | |
| 197 index_last_line_(0), | |
| 198 error_code_(JSONReader::JSON_NO_ERROR), | |
| 199 error_line_(0), | |
| 200 error_column_(0) { | |
| 201 } | |
| 202 | |
| 203 JSONParser::~JSONParser() { | |
| 204 } | |
| 205 | |
| 206 Value* JSONParser::Parse(const std::string& input) { | |
| 207 // TODO(rsesek): Windows has problems with StringPiece/hidden roots. Fix | |
| 208 // <http://crbug.com/126107> when my Windows box arrives. | |
| 209 #if defined(OS_WIN) | |
| 210 options_ |= JSON_DETACHABLE_CHILDREN; | |
| 211 #endif | |
| 212 | |
| 213 std::string input_copy; | |
| 214 // If the children of a JSON root can be detached, then hidden roots cannot | |
| 215 // be used, so do not bother copying the input because StringPiece will not | |
| 216 // be used anywhere. | |
| 217 if (!(options_ & JSON_DETACHABLE_CHILDREN)) { | |
| 218 input_copy = input; | |
| 219 start_pos_ = input_copy.data(); | |
| 220 } else { | |
| 221 start_pos_ = input.data(); | |
| 222 } | |
| 223 pos_ = start_pos_; | |
| 224 end_pos_ = start_pos_ + input.length(); | |
| 225 index_ = 0; | |
| 226 line_number_ = 1; | |
| 227 index_last_line_ = 0; | |
| 228 | |
| 229 error_code_ = JSONReader::JSON_NO_ERROR; | |
| 230 error_line_ = 0; | |
| 231 error_column_ = 0; | |
| 232 | |
| 233 // When the input JSON string starts with a UTF-8 Byte-Order-Mark | |
| 234 // <0xEF 0xBB 0xBF>, advance the start position to avoid the | |
| 235 // ParseNextToken function mis-treating a Unicode BOM as an invalid | |
| 236 // character and returning NULL. | |
| 237 if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF && | |
| 238 static_cast<uint8>(*(pos_ + 1)) == 0xBB && | |
| 239 static_cast<uint8>(*(pos_ + 2)) == 0xBF) { | |
| 240 NextNChars(3); | |
| 241 } | |
| 242 | |
| 243 // Parse the first and all subsequent tokens. | |
| 244 scoped_ptr<Value> root(ParseNextToken()); | |
| 245 if (!root.get()) | |
| 246 return NULL; | |
| 247 | |
| 248 // Make sure the input stream is at an end. | |
| 249 if (GetNextToken() != T_END_OF_INPUT) { | |
| 250 if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) { | |
| 251 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1); | |
| 252 return NULL; | |
| 253 } | |
| 254 } | |
| 255 | |
| 256 // Dictionaries and lists can contain JSONStringValues, so wrap them in a | |
| 257 // hidden root. | |
| 258 if (!(options_ & JSON_DETACHABLE_CHILDREN)) { | |
| 259 if (root->IsType(Value::TYPE_DICTIONARY)) { | |
| 260 return new DictionaryHiddenRootValue(&input_copy, root.release()); | |
| 261 } else if (root->IsType(Value::TYPE_LIST)) { | |
| 262 return new ListHiddenRootValue(&input_copy, root.release()); | |
| 263 } else if (root->IsType(Value::TYPE_STRING)) { | |
| 264 // A string type could be a JSONStringValue, but because there's no | |
| 265 // corresponding HiddenRootValue, the memory will be lost. Deep copy to | |
| 266 // preserve it. | |
| 267 return root->DeepCopy(); | |
| 268 } | |
| 269 } | |
| 270 | |
| 271 // All other values can be returned directly. | |
| 272 return root.release(); | |
| 273 } | |
| 274 | |
| 275 JSONReader::JsonParseError JSONParser::error_code() const { | |
| 276 return error_code_; | |
| 277 } | |
| 278 | |
| 279 std::string JSONParser::GetErrorMessage() const { | |
| 280 return FormatErrorMessage(error_line_, error_column_, | |
| 281 JSONReader::ErrorCodeToString(error_code_)); | |
| 282 } | |
| 283 | |
| 284 // StringBuilder /////////////////////////////////////////////////////////////// | |
| 285 | |
| 286 JSONParser::StringBuilder::StringBuilder() | |
| 287 : pos_(NULL), | |
| 288 length_(0), | |
| 289 string_(NULL) { | |
| 290 } | |
| 291 | |
| 292 JSONParser::StringBuilder::StringBuilder(const char* pos) | |
| 293 : pos_(pos), | |
| 294 length_(0), | |
| 295 string_(NULL) { | |
| 296 } | |
| 297 | |
| 298 void JSONParser::StringBuilder::Swap(StringBuilder* other) { | |
| 299 std::swap(other->string_, string_); | |
| 300 std::swap(other->pos_, pos_); | |
| 301 std::swap(other->length_, length_); | |
| 302 } | |
| 303 | |
| 304 JSONParser::StringBuilder::~StringBuilder() { | |
| 305 delete string_; | |
| 306 } | |
| 307 | |
| 308 void JSONParser::StringBuilder::Append(const char& c) { | |
| 309 DCHECK_GE(c, 0); | |
|
Mark Mentovai
2012/05/08 20:19:41
Because of the stupid nature of char, you should b
Robert Sesek
2012/05/15 16:57:51
How would you do this?
| |
| 310 DCHECK_LT(c, 128); | |
| 311 | |
| 312 if (string_) | |
| 313 string_->push_back(c); | |
| 314 else | |
| 315 ++length_; | |
| 316 } | |
| 317 | |
| 318 void JSONParser::StringBuilder::AppendString(const std::string& str) { | |
| 319 DCHECK(string_); | |
| 320 string_->append(str); | |
| 321 } | |
| 322 | |
| 323 void JSONParser::StringBuilder::Convert() { | |
| 324 if (string_) | |
| 325 return; | |
| 326 string_ = new std::string(pos_, length_); | |
| 327 } | |
| 328 | |
| 329 bool JSONParser::StringBuilder::CanBeStringPiece() const { | |
| 330 return !string_; | |
| 331 } | |
| 332 | |
| 333 StringPiece JSONParser::StringBuilder::AsStringPiece() { | |
| 334 if (string_) | |
| 335 return StringPiece(); | |
| 336 return StringPiece(pos_, length_); | |
| 337 } | |
| 338 | |
| 339 const std::string& JSONParser::StringBuilder::AsString() { | |
| 340 if (!string_) | |
| 341 Convert(); | |
| 342 return *string_; | |
| 343 } | |
| 344 | |
| 345 // JSONParser private ////////////////////////////////////////////////////////// | |
| 346 | |
| 347 inline bool JSONParser::CanConsume(int length) { | |
| 348 return pos_ + length <= end_pos_; | |
| 349 } | |
| 350 | |
| 351 const char* JSONParser::NextChar() { | |
| 352 DCHECK(CanConsume(1)); | |
| 353 ++index_; | |
| 354 ++pos_; | |
| 355 return pos_; | |
| 356 } | |
| 357 | |
| 358 void JSONParser::NextNChars(int n) { | |
| 359 DCHECK(CanConsume(n)); | |
| 360 index_ += n; | |
| 361 pos_ += n; | |
| 362 } | |
| 363 | |
| 364 JSONParser::Token JSONParser::GetNextToken() { | |
| 365 EatWhitespaceAndComments(); | |
| 366 if (!CanConsume(1)) | |
| 367 return T_END_OF_INPUT; | |
| 368 | |
| 369 switch (*pos_) { | |
| 370 case '{': | |
| 371 return T_OBJECT_BEGIN; | |
| 372 case '}': | |
| 373 return T_OBJECT_END; | |
| 374 case '[': | |
| 375 return T_ARRAY_BEGIN; | |
| 376 case ']': | |
| 377 return T_ARRAY_END; | |
| 378 case '"': | |
| 379 return T_STRING; | |
| 380 case '0': | |
| 381 case '1': | |
| 382 case '2': | |
| 383 case '3': | |
| 384 case '4': | |
| 385 case '5': | |
| 386 case '6': | |
| 387 case '7': | |
| 388 case '8': | |
| 389 case '9': | |
| 390 case '-': | |
| 391 return T_NUMBER; | |
| 392 case 't': | |
| 393 return T_BOOL_TRUE; | |
| 394 case 'f': | |
| 395 return T_BOOL_FALSE; | |
| 396 case 'n': | |
| 397 return T_NULL; | |
| 398 case ',': | |
| 399 return T_LIST_SEPARATOR; | |
| 400 case ':': | |
| 401 return T_OBJECT_PAIR_SEPARATOR; | |
| 402 default: | |
| 403 return T_INVALID_TOKEN; | |
| 404 } | |
| 405 } | |
| 406 | |
| 407 void JSONParser::EatWhitespaceAndComments() { | |
| 408 while (pos_ < end_pos_) { | |
| 409 switch (*pos_) { | |
| 410 case '\r': | |
| 411 case '\n': | |
| 412 index_last_line_ = index_; | |
| 413 ++line_number_; | |
| 414 // Fall through. | |
| 415 case ' ': | |
| 416 case '\t': | |
| 417 NextChar(); | |
| 418 break; | |
| 419 case '/': | |
| 420 if (!EatComment()) | |
| 421 return; | |
| 422 break; | |
| 423 default: | |
| 424 return; | |
| 425 } | |
| 426 } | |
| 427 } | |
| 428 | |
| 429 bool JSONParser::EatComment() { | |
| 430 if (*pos_ != '/' || !CanConsume(1)) | |
| 431 return false; | |
| 432 | |
| 433 char next_char = *NextChar(); | |
| 434 if (next_char == '/') { | |
| 435 // Single line comment, read to newline. | |
| 436 while (CanConsume(1)) { | |
| 437 char next_char = *NextChar(); | |
| 438 if (next_char == '\n' || next_char == '\r') | |
| 439 return true; | |
| 440 } | |
| 441 } else if (next_char == '*') { | |
| 442 // Block comment, read until end marker. | |
| 443 while (CanConsume(2)) { | |
| 444 if (*NextChar() == '*' && *NextChar() == '/') { | |
|
Mark Mentovai
2012/05/08 20:19:41
This eats two characters at a time in a loop, so t
Robert Sesek
2012/05/15 16:57:51
Isn't that what's happening? The operator there is
| |
| 445 // EatWhitespaceAndComments will inspect pos_, which will still be on | |
| 446 // the last / of the comment, so advance once more (which may also be | |
| 447 // end of input). | |
| 448 NextChar(); | |
| 449 return true; | |
| 450 } | |
| 451 } | |
|
Mark Mentovai
2012/05/08 20:19:41
If the /* is unterminated and you reach the end of
Robert Sesek
2012/05/15 16:57:51
Done.
| |
| 452 } | |
| 453 | |
| 454 return false; | |
| 455 } | |
| 456 | |
| 457 Value* JSONParser::ParseNextToken() { | |
| 458 return ParseToken(GetNextToken()); | |
| 459 } | |
| 460 | |
| 461 Value* JSONParser::ParseToken(Token token) { | |
| 462 switch (token) { | |
| 463 case T_OBJECT_BEGIN: | |
| 464 return ConsumeDictionary(); | |
| 465 case T_ARRAY_BEGIN: | |
| 466 return ConsumeList(); | |
| 467 case T_STRING: | |
| 468 return ConsumeString(); | |
| 469 case T_NUMBER: | |
| 470 return ConsumeNumber(); | |
| 471 case T_BOOL_TRUE: | |
| 472 case T_BOOL_FALSE: | |
| 473 case T_NULL: | |
| 474 return ConsumeLiteral(); | |
| 475 default: | |
| 476 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); | |
| 477 return NULL; | |
| 478 } | |
| 479 } | |
| 480 | |
| 481 Value* JSONParser::ConsumeDictionary() { | |
| 482 if (*pos_ != '{') { | |
| 483 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); | |
| 484 return NULL; | |
| 485 } | |
| 486 | |
| 487 StackMarker depth_check(&stack_depth_); | |
| 488 if (depth_check.IsTooDeep()) { | |
| 489 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); | |
| 490 return NULL; | |
| 491 } | |
| 492 | |
| 493 scoped_ptr<DictionaryValue> dict(new DictionaryValue); | |
| 494 | |
| 495 NextChar(); | |
| 496 Token token = GetNextToken(); | |
| 497 while (token != T_OBJECT_END) { | |
| 498 if (token != T_STRING) { | |
| 499 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1); | |
| 500 return NULL; | |
| 501 } | |
| 502 | |
| 503 // First consume the key. | |
| 504 StringBuilder key; | |
| 505 if (!ConsumeStringRaw(&key)) { | |
| 506 return NULL; | |
| 507 } | |
| 508 | |
| 509 // Read the separator. | |
| 510 NextChar(); | |
| 511 token = GetNextToken(); | |
| 512 if (token != T_OBJECT_PAIR_SEPARATOR) { | |
| 513 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
| 514 return NULL; | |
| 515 } | |
| 516 | |
| 517 // The token is the value. Ownership transfers to |dict|. | |
| 518 NextChar(); | |
| 519 Value* value = ParseNextToken(); | |
| 520 if (!value) { | |
| 521 return NULL; | |
| 522 } | |
| 523 | |
| 524 dict->SetWithoutPathExpansion(key.AsString(), value); | |
| 525 | |
| 526 NextChar(); | |
| 527 token = GetNextToken(); | |
| 528 if (token == T_LIST_SEPARATOR) { | |
| 529 NextChar(); | |
| 530 token = GetNextToken(); | |
| 531 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { | |
| 532 ReportError(JSONReader::JSON_TRAILING_COMMA, 1); | |
| 533 return NULL; | |
| 534 } | |
| 535 } else if (token != T_OBJECT_END) { | |
| 536 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); | |
| 537 return NULL; | |
| 538 } | |
| 539 } | |
| 540 | |
| 541 if (token != T_OBJECT_END) | |
| 542 return NULL; | |
| 543 | |
| 544 return dict.release(); | |
| 545 } | |
| 546 | |
| 547 Value* JSONParser::ConsumeList() { | |
| 548 if (*pos_ != '[') { | |
| 549 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); | |
| 550 return NULL; | |
| 551 } | |
| 552 | |
| 553 StackMarker depth_check(&stack_depth_); | |
| 554 if (depth_check.IsTooDeep()) { | |
| 555 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); | |
| 556 return NULL; | |
| 557 } | |
| 558 | |
| 559 scoped_ptr<ListValue> list(new ListValue); | |
| 560 | |
| 561 NextChar(); | |
| 562 Token token = GetNextToken(); | |
| 563 while (token != T_ARRAY_END) { | |
| 564 Value* item = ParseToken(token); | |
| 565 if (!item) { | |
| 566 // ReportError from deeper level. | |
| 567 return NULL; | |
| 568 } | |
| 569 | |
| 570 list->Append(item); | |
| 571 | |
| 572 NextChar(); | |
| 573 token = GetNextToken(); | |
| 574 if (token == T_LIST_SEPARATOR) { | |
| 575 NextChar(); | |
| 576 token = GetNextToken(); | |
| 577 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { | |
| 578 ReportError(JSONReader::JSON_TRAILING_COMMA, 1); | |
| 579 return NULL; | |
| 580 } | |
| 581 } else if (token != T_ARRAY_END) { | |
| 582 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
| 583 return NULL; | |
| 584 } | |
| 585 } | |
| 586 | |
| 587 if (token != T_ARRAY_END) | |
| 588 return NULL; | |
| 589 | |
| 590 return list.release(); | |
| 591 } | |
| 592 | |
| 593 Value* JSONParser::ConsumeString() { | |
| 594 StringBuilder string; | |
| 595 if (!ConsumeStringRaw(&string)) | |
| 596 return NULL; | |
| 597 | |
| 598 // Create the Value representation, either using a hidden root, if configured | |
| 599 // to do so, and the string can be represented by StringPiece. | |
| 600 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) { | |
| 601 return new JSONStringValue(string.AsStringPiece()); | |
| 602 } else { | |
| 603 if (string.CanBeStringPiece()) | |
| 604 string.Convert(); | |
| 605 return new StringValue(string.AsString()); | |
| 606 } | |
| 607 } | |
| 608 | |
| 609 bool JSONParser::ConsumeStringRaw(StringBuilder* out) { | |
| 610 if (*pos_ != '"') { | |
| 611 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); | |
| 612 return false; | |
| 613 } | |
| 614 | |
| 615 // StringBuilder will internally build a StringPiece unless a UTF-16 | |
| 616 // conversion occurs, at which point it will perform a copy into a | |
| 617 // std::string. | |
| 618 StringBuilder string(NextChar()); | |
| 619 | |
| 620 int length = end_pos_ - start_pos_; | |
| 621 int32 next_char = 0; | |
| 622 | |
| 623 DCHECK_EQ(*pos_, *(start_pos_ + index_)); | |
|
Mark Mentovai
2012/05/08 20:19:41
Why the *s?
Robert Sesek
2012/05/15 16:57:51
Debugging code removed.
| |
| 624 | |
| 625 while (CanConsume(1)) { | |
| 626 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement. | |
| 627 CBU8_NEXT(start_pos_, index_, length, next_char); | |
| 628 if (next_char < 0 || !IsValidCharacter(next_char)) { | |
| 629 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1); | |
| 630 return false; | |
| 631 } | |
| 632 | |
| 633 // If this character is an escape sequence... | |
| 634 if (next_char == '\\') { | |
| 635 // The input string will be adjusted (either by combining the two | |
| 636 // characters of an encoded escape sequence, or with a UTF conversion), | |
| 637 // so using StringPiece isn't possible -- force a conversion. | |
| 638 string.Convert(); | |
| 639 | |
| 640 if (!CanConsume(1)) { | |
| 641 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); | |
| 642 return false; | |
| 643 } | |
| 644 | |
| 645 switch (*NextChar()) { | |
| 646 // Allowed esape sequences: | |
| 647 case 'x': { // UTF-8 sequence. | |
| 648 if (!CanConsume(2)) { | |
| 649 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1); | |
| 650 return false; | |
| 651 } | |
| 652 | |
| 653 int hex_digit = 0; | |
| 654 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) { | |
| 655 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); | |
| 656 return false; | |
| 657 } | |
| 658 NextChar(); | |
| 659 | |
| 660 if (hex_digit < kExtendedASCIIStart) | |
| 661 string.Append(hex_digit); | |
| 662 else | |
| 663 DecodeUTF8(hex_digit, &string); | |
|
Mark Mentovai
2012/05/08 20:19:41
How is this supposed to work? Why don’t I see it i
Robert Sesek
2012/05/15 16:57:51
Documented. I don't want to remove this now (witho
| |
| 664 break; | |
| 665 } | |
| 666 case 'u': { // UTF-16 sequence. | |
| 667 // UTF units are of the form \uXXXX. | |
| 668 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits. | |
| 669 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); | |
| 670 return false; | |
| 671 } | |
| 672 | |
| 673 // Skip the 'u'. | |
| 674 NextChar(); | |
| 675 | |
| 676 std::string utf8_units; | |
| 677 if (!DecodeUTF16(&utf8_units)) { | |
| 678 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); | |
| 679 return false; | |
| 680 } | |
| 681 | |
| 682 string.AppendString(utf8_units); | |
| 683 break; | |
| 684 } | |
| 685 case '"': | |
| 686 string.Append('"'); | |
| 687 break; | |
| 688 case '\\': | |
| 689 string.Append('\\'); | |
| 690 break; | |
| 691 case '/': | |
| 692 string.Append('/'); | |
| 693 break; | |
| 694 case 'b': | |
| 695 string.Append('\b'); | |
| 696 break; | |
| 697 case 'f': | |
| 698 string.Append('\f'); | |
| 699 break; | |
| 700 case 'n': | |
| 701 string.Append('\n'); | |
| 702 break; | |
| 703 case 'r': | |
| 704 string.Append('\r'); | |
| 705 break; | |
| 706 case 't': | |
| 707 string.Append('\t'); | |
| 708 break; | |
| 709 case 'v': // Not listed as valid escape sequence in the RFC. | |
| 710 string.Append('\v'); | |
| 711 break; | |
| 712 // All other escape squences are illegal. | |
| 713 default: | |
| 714 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); | |
| 715 return false; | |
| 716 } | |
| 717 } else if (next_char == '"') { | |
| 718 --index_; // Rewind by one because of CBU8_NEXT. | |
| 719 out->Swap(&string); | |
| 720 return true; | |
| 721 } else { | |
| 722 if (next_char < kExtendedASCIIStart) | |
| 723 string.Append(next_char); | |
| 724 else | |
| 725 DecodeUTF8(next_char, &string); | |
| 726 } | |
| 727 } | |
| 728 | |
| 729 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); | |
| 730 return false; | |
| 731 } | |
| 732 | |
| 733 // Entry is at the first X in \uXXXX. | |
| 734 bool JSONParser::DecodeUTF16(std::string* dest_string) { | |
| 735 if (!CanConsume(4)) | |
| 736 return false; | |
| 737 | |
| 738 // This is a 32-bit field because the shift operations in the | |
| 739 // conversion process below cause MSVC to error about "data loss." | |
| 740 // This only stores UTF-16 code units, though. | |
| 741 // Consume the UTF-16 code unit, which may be a high surrogate. | |
| 742 int code_unit16_high = 0; | |
| 743 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high)) | |
| 744 return false; | |
| 745 | |
| 746 // Only add 3, not 4, because at the end of this iteration, the parser has | |
| 747 // finished working with the last digit of the UTF sequence, meaning that | |
| 748 // the next spin of the loop will advance to the next byte. | |
| 749 NextNChars(3); | |
| 750 | |
| 751 // If this is a high surrogate, consume the next code unit to get the | |
| 752 // low surrogate. | |
| 753 int code_unit16_low = 0; | |
| 754 if (CBU16_IS_SURROGATE(code_unit16_high)) { | |
| 755 // Make sure this is the high surrogate. If not, it's an encoding | |
| 756 // error. | |
| 757 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) | |
| 758 return false; | |
| 759 | |
| 760 // Make sure that the token has more characters to consume the | |
| 761 // lower surrogate. | |
| 762 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits. | |
| 763 return false; | |
| 764 if (*NextChar() != '\\' || *NextChar() != 'u') | |
| 765 return false; | |
| 766 | |
| 767 NextChar(); // Read past 'u'. | |
| 768 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low)) | |
| 769 return false; | |
| 770 | |
| 771 NextNChars(3); | |
| 772 | |
| 773 if (!CBU16_IS_SURROGATE(code_unit16_low) || | |
| 774 !CBU16_IS_TRAIL(code_unit16_low)) { | |
|
Mark Mentovai
2012/05/08 20:19:41
CBU16_IS_TRAIL implies CBU16_IS_SURROGATE, you onl
Robert Sesek
2012/05/15 16:57:51
Done.
| |
| 775 return false; | |
| 776 } | |
| 777 } else if (!CBU16_IS_SINGLE(code_unit16_high)) { | |
|
Mark Mentovai
2012/05/08 20:19:41
CBU16_IS_SINGLE is defined as !CBU16_IS_SURROGATE,
Robert Sesek
2012/05/15 16:57:51
Done.
| |
| 778 // If this is not a code point, it's an encoding error. | |
| 779 return false; | |
| 780 } | |
| 781 | |
| 782 // Convert the UTF-16 code units to a code point and then to a UTF-8 | |
| 783 // code unit sequence. | |
| 784 char code_point[8] = { 0 }; | |
| 785 size_t offset = 0; | |
| 786 if (!code_unit16_low) { | |
|
Mark Mentovai
2012/05/08 20:19:41
Rather than rechecking this, why don’t you do it i
Robert Sesek
2012/05/15 16:57:51
Done.
| |
| 787 CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high); | |
| 788 } else { | |
| 789 uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high, | |
| 790 code_unit16_low); | |
| 791 offset = 0; | |
| 792 CBU8_APPEND_UNSAFE(code_point, offset, code_unit32); | |
|
Mark Mentovai
2012/05/08 20:19:41
And the same for this, except you’d put it in the
Robert Sesek
2012/05/15 16:57:51
Done.
| |
| 793 } | |
| 794 dest_string->append(code_point); | |
| 795 return true; | |
| 796 } | |
| 797 | |
| 798 void JSONParser::DecodeUTF8(const int32& point, StringBuilder* dest) { | |
| 799 // Anything outside of the basic ASCII plane will need to be decomposed from | |
| 800 // int32 to a multi-byte sequence. | |
| 801 if (point < kExtendedASCIIStart) { | |
| 802 dest->Append(point); | |
| 803 } else { | |
| 804 char utf8_units[4] = { 0 }; | |
| 805 int offset = 0; | |
| 806 CBU8_APPEND_UNSAFE(utf8_units, offset, point); | |
| 807 dest->Convert(); | |
| 808 dest->AppendString(utf8_units); | |
| 809 } | |
| 810 } | |
| 811 | |
| 812 Value* JSONParser::ConsumeNumber() { | |
| 813 const char* num_start = pos_; | |
| 814 const int start_index = index_; | |
| 815 int end_index = start_index; | |
| 816 | |
| 817 if (*pos_ == '-') | |
| 818 NextChar(); | |
| 819 | |
| 820 if (!ReadInt(false)) { | |
| 821 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
| 822 return NULL; | |
| 823 } | |
| 824 end_index = index_; | |
| 825 | |
| 826 // The optional faction part. | |
|
Mark Mentovai
2012/05/08 20:19:41
fraction
Robert Sesek
2012/05/15 16:57:51
Done.
| |
| 827 if (*pos_ == '.') { | |
| 828 if (!CanConsume(1)) { | |
| 829 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
| 830 return NULL; | |
| 831 } | |
| 832 NextChar(); | |
| 833 if (!ReadInt(true)) { | |
| 834 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
| 835 return NULL; | |
| 836 } | |
| 837 end_index = index_; | |
| 838 } | |
| 839 | |
| 840 // Optional exponent part. | |
| 841 if (*pos_ == 'e' || *pos_ == 'E') { | |
| 842 NextChar(); | |
| 843 if (*pos_ == '-' || *pos_ == '+') | |
| 844 NextChar(); | |
| 845 if (!ReadInt(true)) { | |
| 846 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
| 847 return NULL; | |
| 848 } | |
| 849 end_index = index_; | |
| 850 } | |
| 851 | |
| 852 // ReadInt is greedy because numbers have no easily detectable sentinel, | |
| 853 // so save off where the parser should be on exit (see Consume invariant at | |
| 854 // the top of the header), then make sure the next token is one which is | |
| 855 // valid. | |
| 856 const char* exit_pos = pos_ - 1; | |
| 857 int exit_index = index_ - 1; | |
| 858 | |
| 859 switch (GetNextToken()) { | |
| 860 case T_OBJECT_END: | |
| 861 case T_ARRAY_END: | |
| 862 case T_LIST_SEPARATOR: | |
| 863 case T_END_OF_INPUT: | |
| 864 break; | |
| 865 default: | |
| 866 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
| 867 return NULL; | |
| 868 } | |
| 869 | |
| 870 pos_ = exit_pos; | |
| 871 index_ = exit_index; | |
| 872 | |
| 873 StringPiece num_string(num_start, end_index - start_index); | |
| 874 | |
| 875 int num_int; | |
| 876 if (StringToInt(num_string, &num_int)) | |
| 877 return Value::CreateIntegerValue(num_int); | |
| 878 | |
| 879 double num_double; | |
| 880 if (base::StringToDouble(num_string.as_string(), &num_double) && | |
| 881 IsFinite(num_double)) { | |
| 882 return Value::CreateDoubleValue(num_double); | |
| 883 } | |
| 884 | |
| 885 return NULL; | |
| 886 } | |
| 887 | |
| 888 bool JSONParser::ReadInt(bool allow_leading_zeros) { | |
| 889 char first = *pos_; | |
| 890 int len = 0; | |
| 891 | |
| 892 char c = first; | |
| 893 while (CanConsume(1) && IsAsciiDigit(c)) { | |
| 894 c = *NextChar(); | |
| 895 ++len; | |
| 896 } | |
| 897 | |
| 898 if (len == 0) | |
| 899 return false; | |
| 900 | |
| 901 if (!allow_leading_zeros && len > 1 && first == '0') | |
| 902 return false; | |
| 903 | |
| 904 return true; | |
| 905 } | |
| 906 | |
| 907 Value* JSONParser::ConsumeLiteral() { | |
| 908 switch (*pos_) { | |
| 909 case 't': | |
|
Mark Mentovai
2012/05/08 20:19:41
I’d be more comfortable having kTrueLiteral[] = "t
Robert Sesek
2012/05/15 16:57:51
Done.
| |
| 910 if (!CanConsume(3) || !StringsAreEqual(pos_, "true", 4)) { | |
| 911 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
| 912 return NULL; | |
| 913 } | |
| 914 NextNChars(3); | |
| 915 return Value::CreateBooleanValue(true); | |
| 916 case 'f': | |
| 917 if (!CanConsume(4) || !StringsAreEqual(pos_, "false", 5)) { | |
| 918 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
| 919 return NULL; | |
| 920 } | |
| 921 NextNChars(4); | |
| 922 return Value::CreateBooleanValue(false); | |
| 923 case 'n': | |
| 924 if (!CanConsume(3) || !StringsAreEqual(pos_, "null", 4)) { | |
| 925 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
| 926 return NULL; | |
| 927 } | |
| 928 NextNChars(3); | |
| 929 return Value::CreateNullValue(); | |
| 930 default: | |
| 931 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); | |
| 932 return NULL; | |
| 933 } | |
| 934 } | |
| 935 | |
| 936 // static | |
| 937 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) { | |
| 938 return strncmp(one, two, len) == 0; | |
| 939 } | |
| 940 | |
| 941 void JSONParser::ReportError(JSONReader::JsonParseError code, | |
| 942 int column_adjust) { | |
| 943 error_code_ = code; | |
| 944 error_line_ = line_number_; | |
| 945 error_column_ = index_ - index_last_line_ + column_adjust; | |
| 946 } | |
| 947 | |
| 948 // static | |
| 949 std::string JSONParser::FormatErrorMessage(int line, int column, | |
| 950 const std::string& description) { | |
| 951 if (line || column) { | |
|
Mark Mentovai
2012/05/08 20:19:41
Do you ever have !line && column, or the other way
Robert Sesek
2012/05/15 16:57:51
No, but one could be zero.
| |
| 952 return StringPrintf("Line: %i, column: %i, %s", | |
| 953 line, column, description.c_str()); | |
| 954 } | |
| 955 return description; | |
| 956 } | |
| 957 | |
| 958 } // namespace internal | |
| 959 } // namespace base | |
| OLD | NEW |