OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // A JSON parser. Converts strings of JSON into a Value object (see | 5 // A JSON parser. Converts strings of JSON into a Value object (see |
6 // base/values.h). | 6 // base/values.h). |
7 // http://www.ietf.org/rfc/rfc4627.txt?number=4627 | 7 // http://www.ietf.org/rfc/rfc4627.txt?number=4627 |
8 // | 8 // |
9 // Known limitations/deviations from the RFC: | 9 // Known limitations/deviations from the RFC: |
10 // - Only knows how to parse ints within the range of a signed 32 bit int and | 10 // - Only knows how to parse ints within the range of a signed 32 bit int and |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
64 NUMBER, | 64 NUMBER, |
65 BOOL_TRUE, // true | 65 BOOL_TRUE, // true |
66 BOOL_FALSE, // false | 66 BOOL_FALSE, // false |
67 NULL_TOKEN, // null | 67 NULL_TOKEN, // null |
68 LIST_SEPARATOR, // , | 68 LIST_SEPARATOR, // , |
69 OBJECT_PAIR_SEPARATOR, // : | 69 OBJECT_PAIR_SEPARATOR, // : |
70 END_OF_INPUT, | 70 END_OF_INPUT, |
71 INVALID_TOKEN, | 71 INVALID_TOKEN, |
72 }; | 72 }; |
73 | 73 |
74 Token(Type t, const wchar_t* b, int len) | 74 Token(Type t, const char* b, int len) |
75 : type(t), begin(b), length(len) {} | 75 : type(t), begin(b), length(len) {} |
76 | 76 |
77 // Get the character that's one past the end of this token. | 77 // Get the character that's one past the end of this token. |
78 wchar_t NextChar() { | 78 char NextChar() { |
79 return *(begin + length); | 79 return *(begin + length); |
80 } | 80 } |
81 | 81 |
82 static Token CreateInvalidToken() { | 82 static Token CreateInvalidToken() { |
83 return Token(INVALID_TOKEN, 0, 0); | 83 return Token(INVALID_TOKEN, 0, 0); |
84 } | 84 } |
85 | 85 |
86 Type type; | 86 Type type; |
87 | 87 |
88 // A pointer into JSONReader::json_pos_ that's the beginning of this token. | 88 // A pointer into JSONReader::json_pos_ that's the beginning of this token. |
89 const wchar_t* begin; | 89 const char* begin; |
90 | 90 |
91 // End should be one char past the end of the token. | 91 // End should be one char past the end of the token. |
92 int length; | 92 int length; |
93 }; | 93 }; |
94 | 94 |
95 // Error codes during parsing. | 95 // Error codes during parsing. |
96 enum JsonParseError { | 96 enum JsonParseError { |
97 JSON_NO_ERROR = 0, | 97 JSON_NO_ERROR = 0, |
98 JSON_BAD_ROOT_ELEMENT_TYPE, | 98 JSON_BAD_ROOT_ELEMENT_TYPE, |
99 JSON_INVALID_ESCAPE, | 99 JSON_INVALID_ESCAPE, |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
179 // Parses a sequence of characters into a Token::STRING. If the sequence of | 179 // Parses a sequence of characters into a Token::STRING. If the sequence of |
180 // characters is not a valid string, returns a Token::INVALID_TOKEN. Note | 180 // characters is not a valid string, returns a Token::INVALID_TOKEN. Note |
181 // that DecodeString is used to actually decode the escaped string into an | 181 // that DecodeString is used to actually decode the escaped string into an |
182 // actual wstring. | 182 // actual wstring. |
183 Token ParseStringToken(); | 183 Token ParseStringToken(); |
184 | 184 |
185 // Convert the substring into a value string. This should always succeed | 185 // Convert the substring into a value string. This should always succeed |
186 // (otherwise ParseStringToken would have failed). | 186 // (otherwise ParseStringToken would have failed). |
187 Value* DecodeString(const Token& token); | 187 Value* DecodeString(const Token& token); |
188 | 188 |
| 189 // Helper function for DecodeString that consumes UTF16 [0,2] code units and |
| 190 // convers them to UTF8 code untis. |token| is the string token in which the |
| 191 // units should be read, |i| is the position in the token at which the first |
| 192 // code unit starts, immediately after the |\u|. This will be mutated if code |
| 193 // units are consumed. |dest_string| is a string to which the UTF8 code unit |
| 194 // should be appended. Returns true on success and false if there's an |
| 195 // encoding error. |
| 196 bool ConvertUTF16Units(const Token& token, |
| 197 int* i, |
| 198 std::string* dest_string); |
| 199 |
189 // Grabs the next token in the JSON stream. This does not increment the | 200 // Grabs the next token in the JSON stream. This does not increment the |
190 // stream so it can be used to look ahead at the next token. | 201 // stream so it can be used to look ahead at the next token. |
191 Token ParseToken(); | 202 Token ParseToken(); |
192 | 203 |
193 // Increments |json_pos_| past leading whitespace and comments. | 204 // Increments |json_pos_| past leading whitespace and comments. |
194 void EatWhitespaceAndComments(); | 205 void EatWhitespaceAndComments(); |
195 | 206 |
196 // If |json_pos_| is at the start of a comment, eat it, otherwise, returns | 207 // If |json_pos_| is at the start of a comment, eat it, otherwise, returns |
197 // false. | 208 // false. |
198 bool EatComment(); | 209 bool EatComment(); |
199 | 210 |
200 // Checks if |json_pos_| matches str. | 211 // Checks if |json_pos_| matches str. |
201 bool NextStringMatch(const wchar_t* str, size_t length); | 212 bool NextStringMatch(const char* str, size_t length); |
202 | 213 |
203 // Sets the error code that will be returned to the caller. The current | 214 // Sets the error code that will be returned to the caller. The current |
204 // line and column are determined and added into the final message. | 215 // line and column are determined and added into the final message. |
205 void SetErrorCode(const JsonParseError error, const wchar_t* error_pos); | 216 void SetErrorCode(const JsonParseError error, const char* error_pos); |
206 | 217 |
207 // Pointer to the starting position in the input string. | 218 // Pointer to the starting position in the input string. |
208 const wchar_t* start_pos_; | 219 const char* start_pos_; |
209 | 220 |
210 // Pointer to the current position in the input string. | 221 // Pointer to the current position in the input string. |
211 const wchar_t* json_pos_; | 222 const char* json_pos_; |
| 223 |
| 224 // Pointer to the last position in the input string. |
| 225 const char* end_pos_; |
212 | 226 |
213 // Used to keep track of how many nested lists/dicts there are. | 227 // Used to keep track of how many nested lists/dicts there are. |
214 int stack_depth_; | 228 int stack_depth_; |
215 | 229 |
216 // A parser flag that allows trailing commas in objects and arrays. | 230 // A parser flag that allows trailing commas in objects and arrays. |
217 bool allow_trailing_comma_; | 231 bool allow_trailing_comma_; |
218 | 232 |
219 // Contains the error code for the last call to JsonToValue(), if any. | 233 // Contains the error code for the last call to JsonToValue(), if any. |
220 JsonParseError error_code_; | 234 JsonParseError error_code_; |
221 int error_line_; | 235 int error_line_; |
222 int error_col_; | 236 int error_col_; |
223 | 237 |
224 DISALLOW_COPY_AND_ASSIGN(JSONReader); | 238 DISALLOW_COPY_AND_ASSIGN(JSONReader); |
225 }; | 239 }; |
226 | 240 |
227 } // namespace base | 241 } // namespace base |
228 | 242 |
229 #endif // BASE_JSON_JSON_READER_H_ | 243 #endif // BASE_JSON_JSON_READER_H_ |
OLD | NEW |