| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 // | |
| 5 // A JSON parser. Converts strings of JSON into a Value object (see | |
| 6 // base/values.h). | |
| 7 // http://www.ietf.org/rfc/rfc4627.txt?number=4627 | |
| 8 // | |
| 9 // Known limitations/deviations from the RFC: | |
| 10 // - Only knows how to parse ints within the range of a signed 32 bit int and | |
| 11 // decimal numbers within a double. | |
| 12 // - Assumes input is encoded as UTF8. The spec says we should allow UTF-16 | |
| 13 // (BE or LE) and UTF-32 (BE or LE) as well. | |
| 14 // - We limit nesting to 100 levels to prevent stack overflow (this is allowed | |
| 15 // by the RFC). | |
| 16 // - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data | |
| 17 // stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input | |
| 18 // UTF-8 string for the JSONReader::JsonToValue() function may start with a | |
| 19 // UTF-8 BOM (0xEF, 0xBB, 0xBF). | |
| 20 // To avoid the function from mis-treating a UTF-8 BOM as an invalid | |
| 21 // character, the function skips a Unicode BOM at the beginning of the | |
| 22 // Unicode string (converted from the input UTF-8 string) before parsing it. | |
| 23 // | |
| 24 // TODO(tc): Add a parsing option to to relax object keys being wrapped in | |
| 25 // double quotes | |
| 26 // TODO(tc): Add an option to disable comment stripping | |
| 27 // TODO(aa): Consider making the constructor public and the static Read() method | |
| 28 // only a convenience for the common uses with more complex configuration going | |
| 29 // on the instance. | |
| 30 | |
| 31 #ifndef BASE_JSON_READER_H_ | |
| 32 #define BASE_JSON_READER_H_ | |
| 33 | |
| 34 #include <string> | |
| 35 | |
| 36 #include "base/basictypes.h" | |
| 37 #include "testing/gtest/include/gtest/gtest_prod.h" | |
| 38 | |
| 39 class Value; | |
| 40 | |
| 41 class JSONReader { | |
| 42 public: | |
| 43 // A struct to hold a JS token. | |
| 44 class Token { | |
| 45 public: | |
| 46 enum Type { | |
| 47 OBJECT_BEGIN, // { | |
| 48 OBJECT_END, // } | |
| 49 ARRAY_BEGIN, // [ | |
| 50 ARRAY_END, // ] | |
| 51 STRING, | |
| 52 NUMBER, | |
| 53 BOOL_TRUE, // true | |
| 54 BOOL_FALSE, // false | |
| 55 NULL_TOKEN, // null | |
| 56 LIST_SEPARATOR, // , | |
| 57 OBJECT_PAIR_SEPARATOR, // : | |
| 58 END_OF_INPUT, | |
| 59 INVALID_TOKEN, | |
| 60 }; | |
| 61 Token(Type t, const wchar_t* b, int len) | |
| 62 : type(t), begin(b), length(len) {} | |
| 63 | |
| 64 Type type; | |
| 65 | |
| 66 // A pointer into JSONReader::json_pos_ that's the beginning of this token. | |
| 67 const wchar_t* begin; | |
| 68 | |
| 69 // End should be one char past the end of the token. | |
| 70 int length; | |
| 71 | |
| 72 // Get the character that's one past the end of this token. | |
| 73 wchar_t NextChar() { | |
| 74 return *(begin + length); | |
| 75 } | |
| 76 }; | |
| 77 | |
| 78 // Error messages that can be returned. | |
| 79 static const char* kBadRootElementType; | |
| 80 static const char* kInvalidEscape; | |
| 81 static const char* kSyntaxError; | |
| 82 static const char* kTrailingComma; | |
| 83 static const char* kTooMuchNesting; | |
| 84 static const char* kUnexpectedDataAfterRoot; | |
| 85 static const char* kUnsupportedEncoding; | |
| 86 static const char* kUnquotedDictionaryKey; | |
| 87 | |
| 88 JSONReader(); | |
| 89 | |
| 90 // Reads and parses |json|, returning a Value. The caller owns the returned | |
| 91 // instance. If |json| is not a properly formed JSON string, returns NULL. | |
| 92 // If |allow_trailing_comma| is true, we will ignore trailing commas in | |
| 93 // objects and arrays even though this goes against the RFC. | |
| 94 static Value* Read(const std::string& json, bool allow_trailing_comma); | |
| 95 | |
| 96 // Reads and parses |json| like Read(). |error_message_out| is optional. If | |
| 97 // specified and NULL is returned, |error_message_out| will be populated with | |
| 98 // a string describing the error. Otherwise, |error_message_out| is | |
| 99 // unmodified. | |
| 100 static Value* ReadAndReturnError(const std::string& json, | |
| 101 bool allow_trailing_comma, | |
| 102 std::string* error_message_out); | |
| 103 | |
| 104 // Returns the error message if the last call to JsonToValue() failed. If the | |
| 105 // last call did not fail, returns a valid empty string. | |
| 106 std::string error_message() { return error_message_; } | |
| 107 | |
| 108 // Reads and parses |json|, returning a Value. The caller owns the returned | |
| 109 // instance. If |json| is not a properly formed JSON string, returns NULL and | |
| 110 // a detailed error can be retrieved from |error_message()|. | |
| 111 // If |check_root| is true, we require that the root object be an object or | |
| 112 // array. Otherwise, it can be any valid JSON type. | |
| 113 // If |allow_trailing_comma| is true, we will ignore trailing commas in | |
| 114 // objects and arrays even though this goes against the RFC. | |
| 115 Value* JsonToValue(const std::string& json, bool check_root, | |
| 116 bool allow_trailing_comma); | |
| 117 | |
| 118 private: | |
| 119 static std::string FormatErrorMessage(int line, int column, | |
| 120 const char* description); | |
| 121 | |
| 122 DISALLOW_EVIL_CONSTRUCTORS(JSONReader); | |
| 123 | |
| 124 FRIEND_TEST(JSONReaderTest, Reading); | |
| 125 FRIEND_TEST(JSONReaderTest, ErrorMessages); | |
| 126 | |
| 127 // Recursively build Value. Returns NULL if we don't have a valid JSON | |
| 128 // string. If |is_root| is true, we verify that the root element is either | |
| 129 // an object or an array. | |
| 130 Value* BuildValue(bool is_root); | |
| 131 | |
| 132 // Parses a sequence of characters into a Token::NUMBER. If the sequence of | |
| 133 // characters is not a valid number, returns a Token::INVALID_TOKEN. Note | |
| 134 // that DecodeNumber is used to actually convert from a string to an | |
| 135 // int/double. | |
| 136 Token ParseNumberToken(); | |
| 137 | |
| 138 // Try and convert the substring that token holds into an int or a double. If | |
| 139 // we can (ie., no overflow), return the value, else return NULL. | |
| 140 Value* DecodeNumber(const Token& token); | |
| 141 | |
| 142 // Parses a sequence of characters into a Token::STRING. If the sequence of | |
| 143 // characters is not a valid string, returns a Token::INVALID_TOKEN. Note | |
| 144 // that DecodeString is used to actually decode the escaped string into an | |
| 145 // actual wstring. | |
| 146 Token ParseStringToken(); | |
| 147 | |
| 148 // Convert the substring into a value string. This should always succeed | |
| 149 // (otherwise ParseStringToken would have failed). | |
| 150 Value* DecodeString(const Token& token); | |
| 151 | |
| 152 // Grabs the next token in the JSON stream. This does not increment the | |
| 153 // stream so it can be used to look ahead at the next token. | |
| 154 Token ParseToken(); | |
| 155 | |
| 156 // Increments |json_pos_| past leading whitespace and comments. | |
| 157 void EatWhitespaceAndComments(); | |
| 158 | |
| 159 // If |json_pos_| is at the start of a comment, eat it, otherwise, returns | |
| 160 // false. | |
| 161 bool EatComment(); | |
| 162 | |
| 163 // Checks if |json_pos_| matches str. | |
| 164 bool NextStringMatch(const std::wstring& str); | |
| 165 | |
| 166 // Creates the error message that will be returned to the caller. The current | |
| 167 // line and column are determined and added into the final message. | |
| 168 void SetErrorMessage(const char* description, const wchar_t* error_pos); | |
| 169 | |
| 170 // Pointer to the starting position in the input string. | |
| 171 const wchar_t* start_pos_; | |
| 172 | |
| 173 // Pointer to the current position in the input string. | |
| 174 const wchar_t* json_pos_; | |
| 175 | |
| 176 // Used to keep track of how many nested lists/dicts there are. | |
| 177 int stack_depth_; | |
| 178 | |
| 179 // A parser flag that allows trailing commas in objects and arrays. | |
| 180 bool allow_trailing_comma_; | |
| 181 | |
| 182 // Contains the error message for the last call to JsonToValue(), if any. | |
| 183 std::string error_message_; | |
| 184 }; | |
| 185 | |
| 186 #endif // BASE_JSON_READER_H_ | |
| OLD | NEW |