OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "base/json/json_parser.h" | |
6 | |
7 #include "base/float_util.h" | |
8 #include "base/logging.h" | |
9 #include "base/memory/scoped_ptr.h" | |
10 #include "base/string_number_conversions.h" | |
11 #include "base/string_util.h" | |
12 #include "base/stringprintf.h" | |
13 #include "base/third_party/icu/icu_utf.h" | |
14 #include "base/utf_string_conversion_utils.h" | |
15 #include "base/utf_string_conversions.h" | |
16 #include "base/values.h" | |
17 | |
18 namespace base { | |
19 namespace internal { | |
20 | |
21 namespace { | |
22 | |
23 const int kStackMaxDepth = 100; | |
24 | |
25 const int32 kExtendedASCIIStart = 0x80; | |
26 | |
27 // This and the class below are used to own the JSON input string for when | |
28 // string tokens are stored as StringPiece instead of std::string. This | |
29 // optimization avoids about 2/3rds of string memory copies. The constructor | |
30 // takes the input string and swaps its data into the new instance. The real | |
31 // root value is also Swap()ed into the new instance. | |
32 class DictionaryHiddenRootValue : public base::DictionaryValue { | |
33 public: | |
34 DictionaryHiddenRootValue(std::string* json, Value* root) { | |
35 DCHECK(root->IsType(Value::TYPE_DICTIONARY)); | |
36 DictionaryValue::Swap(static_cast<DictionaryValue*>(root)); | |
37 json->swap(json_); | |
38 } | |
39 | |
40 virtual void Swap(DictionaryValue* other) OVERRIDE { | |
41 DLOG(1) << "Swap()ing a DictionaryValue inefficiently."; | |
Mark Mentovai
2012/05/08 20:19:41
DLOG(1) is DLOG(WARNING). You either meant that or
Robert Sesek
2012/05/15 16:57:51
I've said it before, but it bears repeating: we ha
| |
42 | |
43 // First deep copy to convert JSONStringValue to std::string and swap that | |
44 // copy with |other|, which contains the new contents of |this|. | |
45 scoped_ptr<base::DictionaryValue> copy(DeepCopy()); | |
46 copy->Swap(other); | |
47 | |
48 // Then erase the contents of the current dictionary and swap in the | |
49 // new contents, originally from |other|. | |
50 Clear(); | |
51 json_.clear(); | |
52 DictionaryValue::Swap(copy.get()); | |
53 } | |
54 | |
55 // Not overriding DictionaryValue::Remove because it just calls through to | |
56 // the method below. | |
57 | |
58 virtual bool RemoveWithoutPathExpansion(const std::string& key, | |
59 Value** out) OVERRIDE { | |
60 // If the caller won't take ownership of the removed value, just call up. | |
61 if (!out) | |
62 return DictionaryValue::RemoveWithoutPathExpansion(key, out); | |
63 | |
64 DLOG(1) << "Remove()ing from a DictionaryValue inefficiently."; | |
65 | |
66 // Otherwise, remove the value while its still "owned" by this and copy it | |
67 // to convert any JSONStringValues to std::string. | |
68 Value* out_owned = NULL; | |
69 if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned)) | |
70 return false; | |
71 | |
72 *out = out_owned->DeepCopy(); | |
73 delete out_owned; | |
74 | |
75 return true; | |
76 } | |
77 | |
78 private: | |
79 std::string json_; | |
80 | |
81 DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue); | |
82 }; | |
83 | |
84 class ListHiddenRootValue : public base::ListValue { | |
85 public: | |
86 ListHiddenRootValue(std::string* json, Value* root) { | |
87 DCHECK(root->IsType(Value::TYPE_LIST)); | |
88 ListValue::Swap(static_cast<ListValue*>(root)); | |
89 json->swap(json_); | |
90 } | |
91 | |
92 virtual void Swap(ListValue* other) OVERRIDE { | |
93 DLOG(1) << "Swap()ing a ListValue inefficiently."; | |
94 | |
95 // First deep copy to convert JSONStringValue to std::string and swap that | |
96 // copy with |other|, which contains the new contents of |this|. | |
97 scoped_ptr<base::ListValue> copy(DeepCopy()); | |
98 copy->Swap(other); | |
99 | |
100 // Then erase the contents of the current list and swap in the new contents, | |
101 // originally from |other|. | |
102 Clear(); | |
103 json_.clear(); | |
104 ListValue::Swap(copy.get()); | |
105 } | |
106 | |
107 virtual bool Remove(size_t index, Value** out) OVERRIDE { | |
108 // If the caller won't take ownership of the removed value, just call up. | |
109 if (!out) | |
110 return ListValue::Remove(index, out); | |
111 | |
112 DLOG(1) << "Remove()ing from a ListValue inefficiently."; | |
113 | |
114 // Otherwise, remove the value while its still "owned" by this and copy it | |
115 // to convert any JSONStringValues to std::string. | |
116 Value* out_owned = NULL; | |
117 if (!ListValue::Remove(index, &out_owned)) | |
118 return false; | |
119 | |
120 *out = out_owned->DeepCopy(); | |
121 delete out_owned; | |
122 | |
123 return true; | |
124 } | |
125 | |
126 private: | |
127 std::string json_; | |
128 | |
129 DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue); | |
130 }; | |
131 | |
132 // A variant on StringValue that uses StringPiece instead of copying the string | |
133 // into the Value. This can only be stored in a child of hidden root (above), | |
134 // otherwise the referenced string will not be guaranteed to outlive it. | |
135 class JSONStringValue : public base::Value { | |
136 public: | |
137 explicit JSONStringValue(const base::StringPiece& piece) | |
138 : Value(TYPE_STRING), | |
139 string_piece_(piece) { | |
140 } | |
141 | |
142 // Value: | |
143 bool GetAsString(std::string* out_value) const OVERRIDE { | |
144 string_piece_.CopyToString(out_value); | |
145 return true; | |
146 } | |
147 bool GetAsString(string16* out_value) const OVERRIDE { | |
148 *out_value = UTF8ToUTF16(string_piece_); | |
149 return true; | |
150 } | |
151 virtual Value* DeepCopy() const OVERRIDE { | |
152 return Value::CreateStringValue(string_piece_.as_string()); | |
153 } | |
154 virtual bool Equals(const Value* other) const OVERRIDE { | |
155 std::string other_string; | |
156 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) && | |
157 StringPiece(other_string) == string_piece_; | |
158 } | |
159 | |
160 private: | |
161 // The location in the original input stream. | |
162 base::StringPiece string_piece_; | |
163 | |
164 DISALLOW_COPY_AND_ASSIGN(JSONStringValue); | |
165 }; | |
166 | |
167 // Simple class that checks for maximum recursion/"stack overflow." | |
168 class StackMarker { | |
169 public: | |
170 explicit StackMarker(int* depth) : depth_(depth) { | |
171 ++(*depth_); | |
172 } | |
Mark Mentovai
2012/05/08 20:19:41
You should (D)CHECK here that depth <= kStackMaxDe
Robert Sesek
2012/05/15 16:57:51
Done.
| |
173 ~StackMarker() { | |
174 --(*depth_); | |
175 } | |
176 | |
177 bool IsTooDeep() const { | |
178 return *depth_ >= kStackMaxDepth; | |
179 } | |
180 | |
181 private: | |
182 int* const depth_; | |
183 | |
184 DISALLOW_COPY_AND_ASSIGN(StackMarker); | |
185 }; | |
186 | |
187 } // namespace | |
188 | |
189 JSONParser::JSONParser(int options) | |
190 : options_(options), | |
191 start_pos_(NULL), | |
192 pos_(0), | |
tfarina
2012/05/04 00:25:28
nit: just curious why did you choose 0 to initiali
Robert Sesek
2012/05/15 16:57:51
Done.
| |
193 end_pos_(0), | |
194 index_(0), | |
195 stack_depth_(0), | |
196 line_number_(0), | |
197 index_last_line_(0), | |
198 error_code_(JSONReader::JSON_NO_ERROR), | |
199 error_line_(0), | |
200 error_column_(0) { | |
201 } | |
202 | |
203 JSONParser::~JSONParser() { | |
204 } | |
205 | |
206 Value* JSONParser::Parse(const std::string& input) { | |
207 // TODO(rsesek): Windows has problems with StringPiece/hidden roots. Fix | |
208 // <http://crbug.com/126107> when my Windows box arrives. | |
209 #if defined(OS_WIN) | |
210 options_ |= JSON_DETACHABLE_CHILDREN; | |
211 #endif | |
212 | |
213 std::string input_copy; | |
214 // If the children of a JSON root can be detached, then hidden roots cannot | |
215 // be used, so do not bother copying the input because StringPiece will not | |
216 // be used anywhere. | |
217 if (!(options_ & JSON_DETACHABLE_CHILDREN)) { | |
218 input_copy = input; | |
219 start_pos_ = input_copy.data(); | |
220 } else { | |
221 start_pos_ = input.data(); | |
222 } | |
223 pos_ = start_pos_; | |
224 end_pos_ = start_pos_ + input.length(); | |
225 index_ = 0; | |
226 line_number_ = 1; | |
227 index_last_line_ = 0; | |
228 | |
229 error_code_ = JSONReader::JSON_NO_ERROR; | |
230 error_line_ = 0; | |
231 error_column_ = 0; | |
232 | |
233 // When the input JSON string starts with a UTF-8 Byte-Order-Mark | |
234 // <0xEF 0xBB 0xBF>, advance the start position to avoid the | |
235 // ParseNextToken function mis-treating a Unicode BOM as an invalid | |
236 // character and returning NULL. | |
237 if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF && | |
238 static_cast<uint8>(*(pos_ + 1)) == 0xBB && | |
239 static_cast<uint8>(*(pos_ + 2)) == 0xBF) { | |
240 NextNChars(3); | |
241 } | |
242 | |
243 // Parse the first and all subsequent tokens. | |
244 scoped_ptr<Value> root(ParseNextToken()); | |
245 if (!root.get()) | |
246 return NULL; | |
247 | |
248 // Make sure the input stream is at an end. | |
249 if (GetNextToken() != T_END_OF_INPUT) { | |
250 if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) { | |
251 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1); | |
252 return NULL; | |
253 } | |
254 } | |
255 | |
256 // Dictionaries and lists can contain JSONStringValues, so wrap them in a | |
257 // hidden root. | |
258 if (!(options_ & JSON_DETACHABLE_CHILDREN)) { | |
259 if (root->IsType(Value::TYPE_DICTIONARY)) { | |
260 return new DictionaryHiddenRootValue(&input_copy, root.release()); | |
261 } else if (root->IsType(Value::TYPE_LIST)) { | |
262 return new ListHiddenRootValue(&input_copy, root.release()); | |
263 } else if (root->IsType(Value::TYPE_STRING)) { | |
264 // A string type could be a JSONStringValue, but because there's no | |
265 // corresponding HiddenRootValue, the memory will be lost. Deep copy to | |
266 // preserve it. | |
267 return root->DeepCopy(); | |
268 } | |
269 } | |
270 | |
271 // All other values can be returned directly. | |
272 return root.release(); | |
273 } | |
274 | |
275 JSONReader::JsonParseError JSONParser::error_code() const { | |
276 return error_code_; | |
277 } | |
278 | |
279 std::string JSONParser::GetErrorMessage() const { | |
280 return FormatErrorMessage(error_line_, error_column_, | |
281 JSONReader::ErrorCodeToString(error_code_)); | |
282 } | |
283 | |
284 // StringBuilder /////////////////////////////////////////////////////////////// | |
285 | |
286 JSONParser::StringBuilder::StringBuilder() | |
287 : pos_(NULL), | |
288 length_(0), | |
289 string_(NULL) { | |
290 } | |
291 | |
292 JSONParser::StringBuilder::StringBuilder(const char* pos) | |
293 : pos_(pos), | |
294 length_(0), | |
295 string_(NULL) { | |
296 } | |
297 | |
298 void JSONParser::StringBuilder::Swap(StringBuilder* other) { | |
299 std::swap(other->string_, string_); | |
300 std::swap(other->pos_, pos_); | |
301 std::swap(other->length_, length_); | |
302 } | |
303 | |
304 JSONParser::StringBuilder::~StringBuilder() { | |
305 delete string_; | |
306 } | |
307 | |
308 void JSONParser::StringBuilder::Append(const char& c) { | |
309 DCHECK_GE(c, 0); | |
Mark Mentovai
2012/05/08 20:19:41
Because of the stupid nature of char, you should b
Robert Sesek
2012/05/15 16:57:51
How would you do this?
| |
310 DCHECK_LT(c, 128); | |
311 | |
312 if (string_) | |
313 string_->push_back(c); | |
314 else | |
315 ++length_; | |
316 } | |
317 | |
318 void JSONParser::StringBuilder::AppendString(const std::string& str) { | |
319 DCHECK(string_); | |
320 string_->append(str); | |
321 } | |
322 | |
323 void JSONParser::StringBuilder::Convert() { | |
324 if (string_) | |
325 return; | |
326 string_ = new std::string(pos_, length_); | |
327 } | |
328 | |
329 bool JSONParser::StringBuilder::CanBeStringPiece() const { | |
330 return !string_; | |
331 } | |
332 | |
333 StringPiece JSONParser::StringBuilder::AsStringPiece() { | |
334 if (string_) | |
335 return StringPiece(); | |
336 return StringPiece(pos_, length_); | |
337 } | |
338 | |
339 const std::string& JSONParser::StringBuilder::AsString() { | |
340 if (!string_) | |
341 Convert(); | |
342 return *string_; | |
343 } | |
344 | |
345 // JSONParser private ////////////////////////////////////////////////////////// | |
346 | |
347 inline bool JSONParser::CanConsume(int length) { | |
348 return pos_ + length <= end_pos_; | |
349 } | |
350 | |
351 const char* JSONParser::NextChar() { | |
352 DCHECK(CanConsume(1)); | |
353 ++index_; | |
354 ++pos_; | |
355 return pos_; | |
356 } | |
357 | |
358 void JSONParser::NextNChars(int n) { | |
359 DCHECK(CanConsume(n)); | |
360 index_ += n; | |
361 pos_ += n; | |
362 } | |
363 | |
364 JSONParser::Token JSONParser::GetNextToken() { | |
365 EatWhitespaceAndComments(); | |
366 if (!CanConsume(1)) | |
367 return T_END_OF_INPUT; | |
368 | |
369 switch (*pos_) { | |
370 case '{': | |
371 return T_OBJECT_BEGIN; | |
372 case '}': | |
373 return T_OBJECT_END; | |
374 case '[': | |
375 return T_ARRAY_BEGIN; | |
376 case ']': | |
377 return T_ARRAY_END; | |
378 case '"': | |
379 return T_STRING; | |
380 case '0': | |
381 case '1': | |
382 case '2': | |
383 case '3': | |
384 case '4': | |
385 case '5': | |
386 case '6': | |
387 case '7': | |
388 case '8': | |
389 case '9': | |
390 case '-': | |
391 return T_NUMBER; | |
392 case 't': | |
393 return T_BOOL_TRUE; | |
394 case 'f': | |
395 return T_BOOL_FALSE; | |
396 case 'n': | |
397 return T_NULL; | |
398 case ',': | |
399 return T_LIST_SEPARATOR; | |
400 case ':': | |
401 return T_OBJECT_PAIR_SEPARATOR; | |
402 default: | |
403 return T_INVALID_TOKEN; | |
404 } | |
405 } | |
406 | |
407 void JSONParser::EatWhitespaceAndComments() { | |
408 while (pos_ < end_pos_) { | |
409 switch (*pos_) { | |
410 case '\r': | |
411 case '\n': | |
412 index_last_line_ = index_; | |
413 ++line_number_; | |
414 // Fall through. | |
415 case ' ': | |
416 case '\t': | |
417 NextChar(); | |
418 break; | |
419 case '/': | |
420 if (!EatComment()) | |
421 return; | |
422 break; | |
423 default: | |
424 return; | |
425 } | |
426 } | |
427 } | |
428 | |
429 bool JSONParser::EatComment() { | |
430 if (*pos_ != '/' || !CanConsume(1)) | |
431 return false; | |
432 | |
433 char next_char = *NextChar(); | |
434 if (next_char == '/') { | |
435 // Single line comment, read to newline. | |
436 while (CanConsume(1)) { | |
437 char next_char = *NextChar(); | |
438 if (next_char == '\n' || next_char == '\r') | |
439 return true; | |
440 } | |
441 } else if (next_char == '*') { | |
442 // Block comment, read until end marker. | |
443 while (CanConsume(2)) { | |
444 if (*NextChar() == '*' && *NextChar() == '/') { | |
Mark Mentovai
2012/05/08 20:19:41
This eats two characters at a time in a loop, so t
Robert Sesek
2012/05/15 16:57:51
Isn't that what's happening? The operator there is
| |
445 // EatWhitespaceAndComments will inspect pos_, which will still be on | |
446 // the last / of the comment, so advance once more (which may also be | |
447 // end of input). | |
448 NextChar(); | |
449 return true; | |
450 } | |
451 } | |
Mark Mentovai
2012/05/08 20:19:41
If the /* is unterminated and you reach the end of
Robert Sesek
2012/05/15 16:57:51
Done.
| |
452 } | |
453 | |
454 return false; | |
455 } | |
456 | |
457 Value* JSONParser::ParseNextToken() { | |
458 return ParseToken(GetNextToken()); | |
459 } | |
460 | |
461 Value* JSONParser::ParseToken(Token token) { | |
462 switch (token) { | |
463 case T_OBJECT_BEGIN: | |
464 return ConsumeDictionary(); | |
465 case T_ARRAY_BEGIN: | |
466 return ConsumeList(); | |
467 case T_STRING: | |
468 return ConsumeString(); | |
469 case T_NUMBER: | |
470 return ConsumeNumber(); | |
471 case T_BOOL_TRUE: | |
472 case T_BOOL_FALSE: | |
473 case T_NULL: | |
474 return ConsumeLiteral(); | |
475 default: | |
476 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); | |
477 return NULL; | |
478 } | |
479 } | |
480 | |
481 Value* JSONParser::ConsumeDictionary() { | |
482 if (*pos_ != '{') { | |
483 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); | |
484 return NULL; | |
485 } | |
486 | |
487 StackMarker depth_check(&stack_depth_); | |
488 if (depth_check.IsTooDeep()) { | |
489 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); | |
490 return NULL; | |
491 } | |
492 | |
493 scoped_ptr<DictionaryValue> dict(new DictionaryValue); | |
494 | |
495 NextChar(); | |
496 Token token = GetNextToken(); | |
497 while (token != T_OBJECT_END) { | |
498 if (token != T_STRING) { | |
499 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1); | |
500 return NULL; | |
501 } | |
502 | |
503 // First consume the key. | |
504 StringBuilder key; | |
505 if (!ConsumeStringRaw(&key)) { | |
506 return NULL; | |
507 } | |
508 | |
509 // Read the separator. | |
510 NextChar(); | |
511 token = GetNextToken(); | |
512 if (token != T_OBJECT_PAIR_SEPARATOR) { | |
513 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
514 return NULL; | |
515 } | |
516 | |
517 // The token is the value. Ownership transfers to |dict|. | |
518 NextChar(); | |
519 Value* value = ParseNextToken(); | |
520 if (!value) { | |
521 return NULL; | |
522 } | |
523 | |
524 dict->SetWithoutPathExpansion(key.AsString(), value); | |
525 | |
526 NextChar(); | |
527 token = GetNextToken(); | |
528 if (token == T_LIST_SEPARATOR) { | |
529 NextChar(); | |
530 token = GetNextToken(); | |
531 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { | |
532 ReportError(JSONReader::JSON_TRAILING_COMMA, 1); | |
533 return NULL; | |
534 } | |
535 } else if (token != T_OBJECT_END) { | |
536 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); | |
537 return NULL; | |
538 } | |
539 } | |
540 | |
541 if (token != T_OBJECT_END) | |
542 return NULL; | |
543 | |
544 return dict.release(); | |
545 } | |
546 | |
547 Value* JSONParser::ConsumeList() { | |
548 if (*pos_ != '[') { | |
549 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); | |
550 return NULL; | |
551 } | |
552 | |
553 StackMarker depth_check(&stack_depth_); | |
554 if (depth_check.IsTooDeep()) { | |
555 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); | |
556 return NULL; | |
557 } | |
558 | |
559 scoped_ptr<ListValue> list(new ListValue); | |
560 | |
561 NextChar(); | |
562 Token token = GetNextToken(); | |
563 while (token != T_ARRAY_END) { | |
564 Value* item = ParseToken(token); | |
565 if (!item) { | |
566 // ReportError from deeper level. | |
567 return NULL; | |
568 } | |
569 | |
570 list->Append(item); | |
571 | |
572 NextChar(); | |
573 token = GetNextToken(); | |
574 if (token == T_LIST_SEPARATOR) { | |
575 NextChar(); | |
576 token = GetNextToken(); | |
577 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { | |
578 ReportError(JSONReader::JSON_TRAILING_COMMA, 1); | |
579 return NULL; | |
580 } | |
581 } else if (token != T_ARRAY_END) { | |
582 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
583 return NULL; | |
584 } | |
585 } | |
586 | |
587 if (token != T_ARRAY_END) | |
588 return NULL; | |
589 | |
590 return list.release(); | |
591 } | |
592 | |
593 Value* JSONParser::ConsumeString() { | |
594 StringBuilder string; | |
595 if (!ConsumeStringRaw(&string)) | |
596 return NULL; | |
597 | |
598 // Create the Value representation, either using a hidden root, if configured | |
599 // to do so, and the string can be represented by StringPiece. | |
600 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) { | |
601 return new JSONStringValue(string.AsStringPiece()); | |
602 } else { | |
603 if (string.CanBeStringPiece()) | |
604 string.Convert(); | |
605 return new StringValue(string.AsString()); | |
606 } | |
607 } | |
608 | |
609 bool JSONParser::ConsumeStringRaw(StringBuilder* out) { | |
610 if (*pos_ != '"') { | |
611 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); | |
612 return false; | |
613 } | |
614 | |
615 // StringBuilder will internally build a StringPiece unless a UTF-16 | |
616 // conversion occurs, at which point it will perform a copy into a | |
617 // std::string. | |
618 StringBuilder string(NextChar()); | |
619 | |
620 int length = end_pos_ - start_pos_; | |
621 int32 next_char = 0; | |
622 | |
623 DCHECK_EQ(*pos_, *(start_pos_ + index_)); | |
Mark Mentovai
2012/05/08 20:19:41
Why the *s?
Robert Sesek
2012/05/15 16:57:51
Debugging code removed.
| |
624 | |
625 while (CanConsume(1)) { | |
626 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement. | |
627 CBU8_NEXT(start_pos_, index_, length, next_char); | |
628 if (next_char < 0 || !IsValidCharacter(next_char)) { | |
629 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1); | |
630 return false; | |
631 } | |
632 | |
633 // If this character is an escape sequence... | |
634 if (next_char == '\\') { | |
635 // The input string will be adjusted (either by combining the two | |
636 // characters of an encoded escape sequence, or with a UTF conversion), | |
637 // so using StringPiece isn't possible -- force a conversion. | |
638 string.Convert(); | |
639 | |
640 if (!CanConsume(1)) { | |
641 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); | |
642 return false; | |
643 } | |
644 | |
645 switch (*NextChar()) { | |
646 // Allowed esape sequences: | |
647 case 'x': { // UTF-8 sequence. | |
648 if (!CanConsume(2)) { | |
649 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1); | |
650 return false; | |
651 } | |
652 | |
653 int hex_digit = 0; | |
654 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) { | |
655 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); | |
656 return false; | |
657 } | |
658 NextChar(); | |
659 | |
660 if (hex_digit < kExtendedASCIIStart) | |
661 string.Append(hex_digit); | |
662 else | |
663 DecodeUTF8(hex_digit, &string); | |
Mark Mentovai
2012/05/08 20:19:41
How is this supposed to work? Why don’t I see it i
Robert Sesek
2012/05/15 16:57:51
Documented. I don't want to remove this now (witho
| |
664 break; | |
665 } | |
666 case 'u': { // UTF-16 sequence. | |
667 // UTF units are of the form \uXXXX. | |
668 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits. | |
669 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); | |
670 return false; | |
671 } | |
672 | |
673 // Skip the 'u'. | |
674 NextChar(); | |
675 | |
676 std::string utf8_units; | |
677 if (!DecodeUTF16(&utf8_units)) { | |
678 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); | |
679 return false; | |
680 } | |
681 | |
682 string.AppendString(utf8_units); | |
683 break; | |
684 } | |
685 case '"': | |
686 string.Append('"'); | |
687 break; | |
688 case '\\': | |
689 string.Append('\\'); | |
690 break; | |
691 case '/': | |
692 string.Append('/'); | |
693 break; | |
694 case 'b': | |
695 string.Append('\b'); | |
696 break; | |
697 case 'f': | |
698 string.Append('\f'); | |
699 break; | |
700 case 'n': | |
701 string.Append('\n'); | |
702 break; | |
703 case 'r': | |
704 string.Append('\r'); | |
705 break; | |
706 case 't': | |
707 string.Append('\t'); | |
708 break; | |
709 case 'v': // Not listed as valid escape sequence in the RFC. | |
710 string.Append('\v'); | |
711 break; | |
712 // All other escape squences are illegal. | |
713 default: | |
714 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); | |
715 return false; | |
716 } | |
717 } else if (next_char == '"') { | |
718 --index_; // Rewind by one because of CBU8_NEXT. | |
719 out->Swap(&string); | |
720 return true; | |
721 } else { | |
722 if (next_char < kExtendedASCIIStart) | |
723 string.Append(next_char); | |
724 else | |
725 DecodeUTF8(next_char, &string); | |
726 } | |
727 } | |
728 | |
729 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); | |
730 return false; | |
731 } | |
732 | |
733 // Entry is at the first X in \uXXXX. | |
734 bool JSONParser::DecodeUTF16(std::string* dest_string) { | |
735 if (!CanConsume(4)) | |
736 return false; | |
737 | |
738 // This is a 32-bit field because the shift operations in the | |
739 // conversion process below cause MSVC to error about "data loss." | |
740 // This only stores UTF-16 code units, though. | |
741 // Consume the UTF-16 code unit, which may be a high surrogate. | |
742 int code_unit16_high = 0; | |
743 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high)) | |
744 return false; | |
745 | |
746 // Only add 3, not 4, because at the end of this iteration, the parser has | |
747 // finished working with the last digit of the UTF sequence, meaning that | |
748 // the next spin of the loop will advance to the next byte. | |
749 NextNChars(3); | |
750 | |
751 // If this is a high surrogate, consume the next code unit to get the | |
752 // low surrogate. | |
753 int code_unit16_low = 0; | |
754 if (CBU16_IS_SURROGATE(code_unit16_high)) { | |
755 // Make sure this is the high surrogate. If not, it's an encoding | |
756 // error. | |
757 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) | |
758 return false; | |
759 | |
760 // Make sure that the token has more characters to consume the | |
761 // lower surrogate. | |
762 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits. | |
763 return false; | |
764 if (*NextChar() != '\\' || *NextChar() != 'u') | |
765 return false; | |
766 | |
767 NextChar(); // Read past 'u'. | |
768 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low)) | |
769 return false; | |
770 | |
771 NextNChars(3); | |
772 | |
773 if (!CBU16_IS_SURROGATE(code_unit16_low) || | |
774 !CBU16_IS_TRAIL(code_unit16_low)) { | |
Mark Mentovai
2012/05/08 20:19:41
CBU16_IS_TRAIL implies CBU16_IS_SURROGATE, you onl
Robert Sesek
2012/05/15 16:57:51
Done.
| |
775 return false; | |
776 } | |
777 } else if (!CBU16_IS_SINGLE(code_unit16_high)) { | |
Mark Mentovai
2012/05/08 20:19:41
CBU16_IS_SINGLE is defined as !CBU16_IS_SURROGATE,
Robert Sesek
2012/05/15 16:57:51
Done.
| |
778 // If this is not a code point, it's an encoding error. | |
779 return false; | |
780 } | |
781 | |
782 // Convert the UTF-16 code units to a code point and then to a UTF-8 | |
783 // code unit sequence. | |
784 char code_point[8] = { 0 }; | |
785 size_t offset = 0; | |
786 if (!code_unit16_low) { | |
Mark Mentovai
2012/05/08 20:19:41
Rather than rechecking this, why don’t you do it i
Robert Sesek
2012/05/15 16:57:51
Done.
| |
787 CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high); | |
788 } else { | |
789 uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high, | |
790 code_unit16_low); | |
791 offset = 0; | |
792 CBU8_APPEND_UNSAFE(code_point, offset, code_unit32); | |
Mark Mentovai
2012/05/08 20:19:41
And the same for this, except you’d put it in the
Robert Sesek
2012/05/15 16:57:51
Done.
| |
793 } | |
794 dest_string->append(code_point); | |
795 return true; | |
796 } | |
797 | |
798 void JSONParser::DecodeUTF8(const int32& point, StringBuilder* dest) { | |
799 // Anything outside of the basic ASCII plane will need to be decomposed from | |
800 // int32 to a multi-byte sequence. | |
801 if (point < kExtendedASCIIStart) { | |
802 dest->Append(point); | |
803 } else { | |
804 char utf8_units[4] = { 0 }; | |
805 int offset = 0; | |
806 CBU8_APPEND_UNSAFE(utf8_units, offset, point); | |
807 dest->Convert(); | |
808 dest->AppendString(utf8_units); | |
809 } | |
810 } | |
811 | |
812 Value* JSONParser::ConsumeNumber() { | |
813 const char* num_start = pos_; | |
814 const int start_index = index_; | |
815 int end_index = start_index; | |
816 | |
817 if (*pos_ == '-') | |
818 NextChar(); | |
819 | |
820 if (!ReadInt(false)) { | |
821 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
822 return NULL; | |
823 } | |
824 end_index = index_; | |
825 | |
826 // The optional faction part. | |
Mark Mentovai
2012/05/08 20:19:41
fraction
Robert Sesek
2012/05/15 16:57:51
Done.
| |
827 if (*pos_ == '.') { | |
828 if (!CanConsume(1)) { | |
829 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
830 return NULL; | |
831 } | |
832 NextChar(); | |
833 if (!ReadInt(true)) { | |
834 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
835 return NULL; | |
836 } | |
837 end_index = index_; | |
838 } | |
839 | |
840 // Optional exponent part. | |
841 if (*pos_ == 'e' || *pos_ == 'E') { | |
842 NextChar(); | |
843 if (*pos_ == '-' || *pos_ == '+') | |
844 NextChar(); | |
845 if (!ReadInt(true)) { | |
846 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
847 return NULL; | |
848 } | |
849 end_index = index_; | |
850 } | |
851 | |
852 // ReadInt is greedy because numbers have no easily detectable sentinel, | |
853 // so save off where the parser should be on exit (see Consume invariant at | |
854 // the top of the header), then make sure the next token is one which is | |
855 // valid. | |
856 const char* exit_pos = pos_ - 1; | |
857 int exit_index = index_ - 1; | |
858 | |
859 switch (GetNextToken()) { | |
860 case T_OBJECT_END: | |
861 case T_ARRAY_END: | |
862 case T_LIST_SEPARATOR: | |
863 case T_END_OF_INPUT: | |
864 break; | |
865 default: | |
866 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
867 return NULL; | |
868 } | |
869 | |
870 pos_ = exit_pos; | |
871 index_ = exit_index; | |
872 | |
873 StringPiece num_string(num_start, end_index - start_index); | |
874 | |
875 int num_int; | |
876 if (StringToInt(num_string, &num_int)) | |
877 return Value::CreateIntegerValue(num_int); | |
878 | |
879 double num_double; | |
880 if (base::StringToDouble(num_string.as_string(), &num_double) && | |
881 IsFinite(num_double)) { | |
882 return Value::CreateDoubleValue(num_double); | |
883 } | |
884 | |
885 return NULL; | |
886 } | |
887 | |
888 bool JSONParser::ReadInt(bool allow_leading_zeros) { | |
889 char first = *pos_; | |
890 int len = 0; | |
891 | |
892 char c = first; | |
893 while (CanConsume(1) && IsAsciiDigit(c)) { | |
894 c = *NextChar(); | |
895 ++len; | |
896 } | |
897 | |
898 if (len == 0) | |
899 return false; | |
900 | |
901 if (!allow_leading_zeros && len > 1 && first == '0') | |
902 return false; | |
903 | |
904 return true; | |
905 } | |
906 | |
907 Value* JSONParser::ConsumeLiteral() { | |
908 switch (*pos_) { | |
909 case 't': | |
Mark Mentovai
2012/05/08 20:19:41
I’d be more comfortable having kTrueLiteral[] = "t
Robert Sesek
2012/05/15 16:57:51
Done.
| |
910 if (!CanConsume(3) || !StringsAreEqual(pos_, "true", 4)) { | |
911 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
912 return NULL; | |
913 } | |
914 NextNChars(3); | |
915 return Value::CreateBooleanValue(true); | |
916 case 'f': | |
917 if (!CanConsume(4) || !StringsAreEqual(pos_, "false", 5)) { | |
918 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
919 return NULL; | |
920 } | |
921 NextNChars(4); | |
922 return Value::CreateBooleanValue(false); | |
923 case 'n': | |
924 if (!CanConsume(3) || !StringsAreEqual(pos_, "null", 4)) { | |
925 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); | |
926 return NULL; | |
927 } | |
928 NextNChars(3); | |
929 return Value::CreateNullValue(); | |
930 default: | |
931 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); | |
932 return NULL; | |
933 } | |
934 } | |
935 | |
936 // static | |
937 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) { | |
938 return strncmp(one, two, len) == 0; | |
939 } | |
940 | |
941 void JSONParser::ReportError(JSONReader::JsonParseError code, | |
942 int column_adjust) { | |
943 error_code_ = code; | |
944 error_line_ = line_number_; | |
945 error_column_ = index_ - index_last_line_ + column_adjust; | |
946 } | |
947 | |
948 // static | |
949 std::string JSONParser::FormatErrorMessage(int line, int column, | |
950 const std::string& description) { | |
951 if (line || column) { | |
Mark Mentovai
2012/05/08 20:19:41
Do you ever have !line && column, or the other way
Robert Sesek
2012/05/15 16:57:51
No, but one could be zero.
| |
952 return StringPrintf("Line: %i, column: %i, %s", | |
953 line, column, description.c_str()); | |
954 } | |
955 return description; | |
956 } | |
957 | |
958 } // namespace internal | |
959 } // namespace base | |
OLD | NEW |