OLD | NEW |
---|---|
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // A JSON parser. Converts strings of JSON into a Value object (see | 5 // A JSON parser. Converts strings of JSON into a Value object (see |
6 // base/values.h). | 6 // base/values.h). |
7 // http://www.ietf.org/rfc/rfc4627.txt?number=4627 | 7 // http://www.ietf.org/rfc/rfc4627.txt?number=4627 |
8 // | 8 // |
9 // Known limitations/deviations from the RFC: | 9 // Known limitations/deviations from the RFC: |
10 // - Only knows how to parse ints within the range of a signed 32 bit int and | 10 // - Only knows how to parse ints within the range of a signed 32 bit int and |
11 // decimal numbers within a double. | 11 // decimal numbers within a double. |
12 // - Assumes input is encoded as UTF8. The spec says we should allow UTF-16 | 12 // - Assumes input is encoded as UTF8. The spec says we should allow UTF-16 |
13 // (BE or LE) and UTF-32 (BE or LE) as well. | 13 // (BE or LE) and UTF-32 (BE or LE) as well. |
14 // - We limit nesting to 100 levels to prevent stack overflow (this is allowed | 14 // - We limit nesting to 100 levels to prevent stack overflow (this is allowed |
15 // by the RFC). | 15 // by the RFC). |
16 // - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data | 16 // - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data |
17 // stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input | 17 // stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input |
18 // UTF-8 string for the JSONReader::JsonToValue() function may start with a | 18 // UTF-8 string for the JSONReader::JsonToValue() function may start with a |
19 // UTF-8 BOM (0xEF, 0xBB, 0xBF). | 19 // UTF-8 BOM (0xEF, 0xBB, 0xBF). |
20 // To avoid the function from mis-treating a UTF-8 BOM as an invalid | 20 // To avoid the function from mis-treating a UTF-8 BOM as an invalid |
21 // character, the function skips a Unicode BOM at the beginning of the | 21 // character, the function skips a Unicode BOM at the beginning of the |
22 // Unicode string (converted from the input UTF-8 string) before parsing it. | 22 // Unicode string (converted from the input UTF-8 string) before parsing it. |
23 // | 23 // |
24 // TODO(tc): It would be nice to give back an error string when we fail to | |
25 // parse JSON. | |
26 // TODO(tc): Add a parsing option to to relax object keys being wrapped in | 24 // TODO(tc): Add a parsing option to to relax object keys being wrapped in |
27 // double quotes | 25 // double quotes |
28 // TODO(tc): Add an option to disable comment stripping | 26 // TODO(tc): Add an option to disable comment stripping |
27 // TODO(aa): Consider making the constructor public and the static Read() method | |
28 // only a convenience for the common uses with more complex configuration going | |
29 // on the instance. | |
29 | 30 |
30 #ifndef BASE_JSON_READER_H_ | 31 #ifndef BASE_JSON_READER_H_ |
31 #define BASE_JSON_READER_H_ | 32 #define BASE_JSON_READER_H_ |
32 | 33 |
33 #include <string> | 34 #include <string> |
34 | 35 |
35 #include "base/basictypes.h" | 36 #include "base/basictypes.h" |
36 #include "testing/gtest/include/gtest/gtest_prod.h" | 37 #include "testing/gtest/include/gtest/gtest_prod.h" |
37 | 38 |
38 class Value; | 39 class Value; |
(...skipping 28 matching lines...) Expand all Loading... | |
67 | 68 |
68 // End should be one char past the end of the token. | 69 // End should be one char past the end of the token. |
69 int length; | 70 int length; |
70 | 71 |
71 // Get the character that's one past the end of this token. | 72 // Get the character that's one past the end of this token. |
72 wchar_t NextChar() { | 73 wchar_t NextChar() { |
73 return *(begin + length); | 74 return *(begin + length); |
74 } | 75 } |
75 }; | 76 }; |
76 | 77 |
77 // Reads and parses |json| and populates |root|. If |json| is not a properly | 78 // Error messages that can be returned. |
78 // formed JSON string, returns false and leaves root unaltered. If | 79 static const char* kBadRootElementType; |
79 // allow_trailing_comma is true, we will ignore trailing commas in objects | 80 static const char* kInvalidEscape; |
80 // and arrays even though this goes against the RFC. | 81 static const char* kSyntaxError; |
82 static const char* kTrailingComma; | |
83 static const char* kTooMuchNesting; | |
84 static const char* kUnexpectedDataAfterRoot; | |
85 static const char* kUnsupportedEncoding; | |
86 static const char* kUnquotedDictionaryKey; | |
87 | |
88 // Reads and parses |json| and populates |root|. If |json| is not a properly | |
89 // formed JSON string, returns false, leaves root unaltered and sets | |
90 // error_message if it was non-null. If allow_trailing_comma is true, we will | |
91 // ignore trailing commas in objects and arrays even though this goes against | |
92 // the RFC. | |
81 static bool Read(const std::string& json, | 93 static bool Read(const std::string& json, |
82 Value** root, | 94 Value** root, |
83 bool allow_trailing_comma); | 95 bool allow_trailing_comma); |
84 | 96 |
97 // Reads and parses |json| like Read(). |error_message_out| is optional. If | |
98 // specified and false is returned, error_message_out will be populated with | |
99 // a string describing the error. Otherwise, error_message_out is unmodified. | |
100 static bool ReadAndReturnError(const std::string& json, | |
101 Value** root, | |
102 bool allow_trailing_comma, | |
103 std::string *error_message_out); | |
104 | |
85 private: | 105 private: |
86 JSONReader(const wchar_t* json_start_pos, bool allow_trailing_comma); | 106 static std::string FormatErrorMessage(int line, int column, |
107 const char* description); | |
108 | |
109 JSONReader(); | |
87 DISALLOW_EVIL_CONSTRUCTORS(JSONReader); | 110 DISALLOW_EVIL_CONSTRUCTORS(JSONReader); |
88 | 111 |
89 FRIEND_TEST(JSONReaderTest, Reading); | 112 FRIEND_TEST(JSONReaderTest, Reading); |
113 FRIEND_TEST(JSONReaderTest, ErrorMessages); | |
114 | |
115 // Returns the error message if the last call to JsonToValue() failed. If the | |
116 // last call did not fail, returns a valid empty string. | |
117 std::string* error_message() { return &error_message_; } | |
tony
2008/12/05 20:37:57
Nit: Can we just return a string here? I think th
| |
90 | 118 |
91 // Pass through method from JSONReader::Read. We have this so unittests can | 119 // Pass through method from JSONReader::Read. We have this so unittests can |
92 // disable the root check. | 120 // disable the root check. |
93 static bool JsonToValue(const std::string& json, Value** root, | 121 bool JsonToValue(const std::string& json, Value** root, bool check_root, |
94 bool check_root, | 122 bool allow_trailing_comma); |
95 bool allow_trailing_comma); | |
96 | 123 |
97 // Recursively build Value. Returns false if we don't have a valid JSON | 124 // Recursively build Value. Returns false if we don't have a valid JSON |
98 // string. If |is_root| is true, we verify that the root element is either | 125 // string. If |is_root| is true, we verify that the root element is either |
99 // an object or an array. | 126 // an object or an array. |
100 bool BuildValue(Value** root, bool is_root); | 127 bool BuildValue(Value** root, bool is_root); |
101 | 128 |
102 // Parses a sequence of characters into a Token::NUMBER. If the sequence of | 129 // Parses a sequence of characters into a Token::NUMBER. If the sequence of |
103 // characters is not a valid number, returns a Token::INVALID_TOKEN. Note | 130 // characters is not a valid number, returns a Token::INVALID_TOKEN. Note |
104 // that DecodeNumber is used to actually convert from a string to an | 131 // that DecodeNumber is used to actually convert from a string to an |
105 // int/double. | 132 // int/double. |
(...skipping 22 matching lines...) Expand all Loading... | |
128 // Increments json_pos_ past leading whitespace and comments. | 155 // Increments json_pos_ past leading whitespace and comments. |
129 void EatWhitespaceAndComments(); | 156 void EatWhitespaceAndComments(); |
130 | 157 |
131 // If json_pos_ is at the start of a comment, eat it, otherwise, returns | 158 // If json_pos_ is at the start of a comment, eat it, otherwise, returns |
132 // false. | 159 // false. |
133 bool EatComment(); | 160 bool EatComment(); |
134 | 161 |
135 // Checks if json_pos_ matches str. | 162 // Checks if json_pos_ matches str. |
136 bool NextStringMatch(const std::wstring& str); | 163 bool NextStringMatch(const std::wstring& str); |
137 | 164 |
165 // Creates the error message that will be returned to the caller. The current | |
166 // line and column are determined and added into the final message. | |
167 void SetErrorMessage(const char* description, const wchar_t* error_pos); | |
168 | |
169 // Pointer to the starting position in the input string. | |
170 const wchar_t* start_pos_; | |
171 | |
138 // Pointer to the current position in the input string. | 172 // Pointer to the current position in the input string. |
139 const wchar_t* json_pos_; | 173 const wchar_t* json_pos_; |
140 | 174 |
141 // Used to keep track of how many nested lists/dicts there are. | 175 // Used to keep track of how many nested lists/dicts there are. |
142 int stack_depth_; | 176 int stack_depth_; |
143 | 177 |
144 // A parser flag that allows trailing commas in objects and arrays. | 178 // A parser flag that allows trailing commas in objects and arrays. |
145 bool allow_trailing_comma_; | 179 bool allow_trailing_comma_; |
180 | |
181 // Contains the error message for the last call to JsonToValue(), if any. | |
182 std::string error_message_; | |
146 }; | 183 }; |
147 | 184 |
148 #endif // BASE_JSON_READER_H_ | 185 #endif // BASE_JSON_READER_H_ |
OLD | NEW |