Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // A JSON parser. Converts strings of JSON into a Value object (see | 5 // A JSON parser. Converts strings of JSON into a Value object (see |
| 6 // base/values.h). | 6 // base/values.h). |
| 7 // http://www.ietf.org/rfc/rfc4627.txt?number=4627 | 7 // http://www.ietf.org/rfc/rfc4627.txt?number=4627 |
| 8 // | 8 // |
| 9 // Known limitations/deviations from the RFC: | 9 // Known limitations/deviations from the RFC: |
| 10 // - Only knows how to parse ints within the range of a signed 32 bit int and | 10 // - Only knows how to parse ints within the range of a signed 32 bit int and |
| 11 // decimal numbers within a double. | 11 // decimal numbers within a double. |
| 12 // - Assumes input is encoded as UTF8. The spec says we should allow UTF-16 | 12 // - Assumes input is encoded as UTF8. The spec says we should allow UTF-16 |
| 13 // (BE or LE) and UTF-32 (BE or LE) as well. | 13 // (BE or LE) and UTF-32 (BE or LE) as well. |
| 14 // - We limit nesting to 100 levels to prevent stack overflow (this is allowed | 14 // - We limit nesting to 100 levels to prevent stack overflow (this is allowed |
| 15 // by the RFC). | 15 // by the RFC). |
| 16 // - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data | 16 // - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data |
| 17 // stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input | 17 // stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input |
| 18 // UTF-8 string for the JSONReader::JsonToValue() function may start with a | 18 // UTF-8 string for the JSONReader::JsonToValue() function may start with a |
| 19 // UTF-8 BOM (0xEF, 0xBB, 0xBF). | 19 // UTF-8 BOM (0xEF, 0xBB, 0xBF). |
| 20 // To avoid the function from mis-treating a UTF-8 BOM as an invalid | 20 // To avoid the function from mis-treating a UTF-8 BOM as an invalid |
| 21 // character, the function skips a Unicode BOM at the beginning of the | 21 // character, the function skips a Unicode BOM at the beginning of the |
| 22 // Unicode string (converted from the input UTF-8 string) before parsing it. | 22 // Unicode string (converted from the input UTF-8 string) before parsing it. |
| 23 // | 23 // |
| 24 // TODO(tc): It would be nice to give back an error string when we fail to | |
| 25 // parse JSON. | |
| 26 // TODO(tc): Add a parsing option to to relax object keys being wrapped in | 24 // TODO(tc): Add a parsing option to to relax object keys being wrapped in |
| 27 // double quotes | 25 // double quotes |
| 28 // TODO(tc): Add an option to disable comment stripping | 26 // TODO(tc): Add an option to disable comment stripping |
| 27 // TODO(aa): Consider making the constructor public and the static Read() method | |
| 28 // only a convenience for the common uses with more complex configuration going | |
| 29 // on the instance. | |
| 29 | 30 |
| 30 #ifndef BASE_JSON_READER_H_ | 31 #ifndef BASE_JSON_READER_H_ |
| 31 #define BASE_JSON_READER_H_ | 32 #define BASE_JSON_READER_H_ |
| 32 | 33 |
| 33 #include <string> | 34 #include <string> |
| 34 | 35 |
| 35 #include "base/basictypes.h" | 36 #include "base/basictypes.h" |
| 36 #include "testing/gtest/include/gtest/gtest_prod.h" | 37 #include "testing/gtest/include/gtest/gtest_prod.h" |
| 37 | 38 |
| 38 class Value; | 39 class Value; |
| (...skipping 28 matching lines...) Expand all Loading... | |
| 67 | 68 |
| 68 // End should be one char past the end of the token. | 69 // End should be one char past the end of the token. |
| 69 int length; | 70 int length; |
| 70 | 71 |
| 71 // Get the character that's one past the end of this token. | 72 // Get the character that's one past the end of this token. |
| 72 wchar_t NextChar() { | 73 wchar_t NextChar() { |
| 73 return *(begin + length); | 74 return *(begin + length); |
| 74 } | 75 } |
| 75 }; | 76 }; |
| 76 | 77 |
| 77 // Reads and parses |json| and populates |root|. If |json| is not a properly | 78 // Error messages that can be returned. |
| 78 // formed JSON string, returns false and leaves root unaltered. If | 79 static const char* kBadRootElementType; |
| 79 // allow_trailing_comma is true, we will ignore trailing commas in objects | 80 static const char* kInvalidEscape; |
| 80 // and arrays even though this goes against the RFC. | 81 static const char* kSyntaxError; |
| 82 static const char* kTrailingComma; | |
| 83 static const char* kTooMuchNesting; | |
| 84 static const char* kUnexpectedDataAfterRoot; | |
| 85 static const char* kUnsupportedEncoding; | |
| 86 static const char* kUnquotedDictionaryKey; | |
| 87 | |
| 88 // Reads and parses |json| and populates |root|. If |json| is not a properly | |
| 89 // formed JSON string, returns false, leaves root unaltered and sets | |
| 90 // error_message if it was non-null. If allow_trailing_comma is true, we will | |
| 91 // ignore trailing commas in objects and arrays even though this goes against | |
| 92 // the RFC. | |
| 81 static bool Read(const std::string& json, | 93 static bool Read(const std::string& json, |
| 82 Value** root, | 94 Value** root, |
| 83 bool allow_trailing_comma); | 95 bool allow_trailing_comma); |
| 84 | 96 |
| 97 // Reads and parses |json| like Read(). |error_message_out| is optional. If | |
| 98 // specified and false is returned, error_message_out will be populated with | |
| 99 // a string describing the error. Otherwise, error_message_out is unmodified. | |
| 100 static bool ReadAndReturnError(const std::string& json, | |
| 101 Value** root, | |
| 102 bool allow_trailing_comma, | |
| 103 std::string *error_message_out); | |
| 104 | |
| 85 private: | 105 private: |
| 86 JSONReader(const wchar_t* json_start_pos, bool allow_trailing_comma); | 106 static std::string FormatErrorMessage(int line, int column, |
| 107 const char* description); | |
| 108 | |
| 109 JSONReader(); | |
| 87 DISALLOW_EVIL_CONSTRUCTORS(JSONReader); | 110 DISALLOW_EVIL_CONSTRUCTORS(JSONReader); |
| 88 | 111 |
| 89 FRIEND_TEST(JSONReaderTest, Reading); | 112 FRIEND_TEST(JSONReaderTest, Reading); |
| 113 FRIEND_TEST(JSONReaderTest, ErrorMessages); | |
| 114 | |
| 115 // Returns the error message if the last call to JsonToValue() failed. If the | |
| 116 // last call did not fail, returns a valid empty string. | |
| 117 std::string* error_message() { return &error_message_; } | |
|
tony
2008/12/05 20:37:57
Nit: Can we just return a string here? I think th
| |
| 90 | 118 |
| 91 // Pass through method from JSONReader::Read. We have this so unittests can | 119 // Pass through method from JSONReader::Read. We have this so unittests can |
| 92 // disable the root check. | 120 // disable the root check. |
| 93 static bool JsonToValue(const std::string& json, Value** root, | 121 bool JsonToValue(const std::string& json, Value** root, bool check_root, |
| 94 bool check_root, | 122 bool allow_trailing_comma); |
| 95 bool allow_trailing_comma); | |
| 96 | 123 |
| 97 // Recursively build Value. Returns false if we don't have a valid JSON | 124 // Recursively build Value. Returns false if we don't have a valid JSON |
| 98 // string. If |is_root| is true, we verify that the root element is either | 125 // string. If |is_root| is true, we verify that the root element is either |
| 99 // an object or an array. | 126 // an object or an array. |
| 100 bool BuildValue(Value** root, bool is_root); | 127 bool BuildValue(Value** root, bool is_root); |
| 101 | 128 |
| 102 // Parses a sequence of characters into a Token::NUMBER. If the sequence of | 129 // Parses a sequence of characters into a Token::NUMBER. If the sequence of |
| 103 // characters is not a valid number, returns a Token::INVALID_TOKEN. Note | 130 // characters is not a valid number, returns a Token::INVALID_TOKEN. Note |
| 104 // that DecodeNumber is used to actually convert from a string to an | 131 // that DecodeNumber is used to actually convert from a string to an |
| 105 // int/double. | 132 // int/double. |
| (...skipping 22 matching lines...) Expand all Loading... | |
| 128 // Increments json_pos_ past leading whitespace and comments. | 155 // Increments json_pos_ past leading whitespace and comments. |
| 129 void EatWhitespaceAndComments(); | 156 void EatWhitespaceAndComments(); |
| 130 | 157 |
| 131 // If json_pos_ is at the start of a comment, eat it, otherwise, returns | 158 // If json_pos_ is at the start of a comment, eat it, otherwise, returns |
| 132 // false. | 159 // false. |
| 133 bool EatComment(); | 160 bool EatComment(); |
| 134 | 161 |
| 135 // Checks if json_pos_ matches str. | 162 // Checks if json_pos_ matches str. |
| 136 bool NextStringMatch(const std::wstring& str); | 163 bool NextStringMatch(const std::wstring& str); |
| 137 | 164 |
| 165 // Creates the error message that will be returned to the caller. The current | |
| 166 // line and column are determined and added into the final message. | |
| 167 void SetErrorMessage(const char* description, const wchar_t* error_pos); | |
| 168 | |
| 169 // Pointer to the starting position in the input string. | |
| 170 const wchar_t* start_pos_; | |
| 171 | |
| 138 // Pointer to the current position in the input string. | 172 // Pointer to the current position in the input string. |
| 139 const wchar_t* json_pos_; | 173 const wchar_t* json_pos_; |
| 140 | 174 |
| 141 // Used to keep track of how many nested lists/dicts there are. | 175 // Used to keep track of how many nested lists/dicts there are. |
| 142 int stack_depth_; | 176 int stack_depth_; |
| 143 | 177 |
| 144 // A parser flag that allows trailing commas in objects and arrays. | 178 // A parser flag that allows trailing commas in objects and arrays. |
| 145 bool allow_trailing_comma_; | 179 bool allow_trailing_comma_; |
| 180 | |
| 181 // Contains the error message for the last call to JsonToValue(), if any. | |
| 182 std::string error_message_; | |
| 146 }; | 183 }; |
| 147 | 184 |
| 148 #endif // BASE_JSON_READER_H_ | 185 #endif // BASE_JSON_READER_H_ |
| OLD | NEW |