OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 #include "platform/json.h" | |
6 | |
7 #include "platform/assert.h" | |
8 #include "platform/globals.h" | |
9 #include "platform/utils.h" | |
10 #include "vm/os.h" | |
11 | |
12 namespace dart { | |
13 | |
14 JSONScanner::JSONScanner(const char* json_text) { | |
15 SetText(json_text); | |
16 } | |
17 | |
18 | |
19 void JSONScanner::SetText(const char* json_text) { | |
20 current_pos_ = json_text; | |
21 token_start_ = json_text; | |
22 token_length_ = 0; | |
23 token_ = TokenIllegal; | |
24 } | |
25 | |
26 | |
27 void JSONScanner::Recognize(Token t) { | |
28 ++current_pos_; | |
29 token_ = t; | |
30 } | |
31 | |
32 | |
33 bool JSONScanner::IsLetter(char ch) const { | |
34 return (('A' <= ch) && (ch <= 'Z')) || (('a' <= ch) && (ch <= 'z')); | |
35 } | |
36 | |
37 | |
38 bool JSONScanner::IsDigit(char ch) const { | |
39 return ('0' <= ch) && (ch <= '9'); | |
40 } | |
41 | |
42 | |
43 bool JSONScanner::IsLiteral(const char* literal) { | |
44 int i = 0; | |
45 while ((literal[i] != '\0') && (current_pos_[i] == literal[i])) { | |
46 i++; | |
47 } | |
48 if ((literal[i] == '\0') && !IsLetter(current_pos_[i])) { | |
49 current_pos_ += i; | |
50 return true; | |
51 } | |
52 return false; | |
53 } | |
54 | |
55 | |
56 bool JSONScanner::IsStringLiteral(const char* literal) const { | |
57 if (token_ != TokenString) { | |
58 return false; | |
59 } | |
60 int i = 0; | |
61 while ((i < token_length_) && (token_start_[i] == literal[i])) { | |
62 i++; | |
63 } | |
64 return (i == token_length_) && (literal[i] == '\0'); | |
65 } | |
66 | |
67 | |
68 void JSONScanner::Skip(Token matching_token) { | |
69 while (!EOM() && (token_ != TokenIllegal)) { | |
70 Scan(); | |
71 if (token_ == TokenLBrace) { | |
72 Skip(TokenRBrace); | |
73 } else if (token_ == TokenLBrack) { | |
74 Skip(TokenRBrack); | |
75 } else if (token_ == matching_token) { | |
76 return; | |
77 } else if ((token_ == TokenRBrace) || (token_ == TokenRBrack)) { | |
78 // Mismatched brace or bracket. | |
79 token_ = TokenIllegal; | |
80 } | |
81 } | |
82 } | |
83 | |
84 | |
85 void JSONScanner::ScanString() { | |
86 ASSERT(*current_pos_ == '"'); | |
87 ++current_pos_; | |
88 token_start_ = current_pos_; | |
89 while (*current_pos_ != '"') { | |
90 if (*current_pos_ == '\0') { | |
91 token_length_ = 0; | |
92 token_ = TokenIllegal; | |
93 return; | |
94 } else if (*current_pos_ == '\\') { | |
95 ++current_pos_; | |
96 if (*current_pos_ == '"') { | |
97 // Consume escaped double quote. | |
98 ++current_pos_; | |
99 } | |
100 } else { | |
101 ++current_pos_; | |
102 } | |
103 } | |
104 token_ = TokenString; | |
105 token_length_ = current_pos_ - token_start_; | |
106 ++current_pos_; | |
107 } | |
108 | |
109 | |
110 void JSONScanner::ScanNumber() { | |
111 if (*current_pos_ == '-') { | |
112 ++current_pos_; | |
113 } | |
114 if (!IsDigit(*current_pos_)) { | |
115 token_ = TokenIllegal; | |
116 token_length_ = 0; | |
117 return; | |
118 } | |
119 while (IsDigit(*current_pos_)) { | |
120 ++current_pos_; | |
121 } | |
122 if ((*current_pos_ == '.') || | |
123 (*current_pos_ == 'e') || | |
124 (*current_pos_ == 'E')) { | |
125 // Floating point numbers not supported. | |
126 token_ = TokenIllegal; | |
127 token_length_ = 0; | |
128 return; | |
129 } | |
130 token_ = TokenInteger; | |
131 token_length_ = current_pos_ - token_start_; | |
132 } | |
133 | |
134 | |
135 void JSONScanner::Scan() { | |
136 while ((*current_pos_ == ' ') || | |
137 (*current_pos_ == '\t') || | |
138 (*current_pos_ == '\n')) { | |
139 ++current_pos_; | |
140 } | |
141 token_start_ = current_pos_; | |
142 if (*current_pos_ == '\0') { | |
143 token_length_ = 0; | |
144 token_ = TokenEOM; | |
145 return; | |
146 } | |
147 switch (*current_pos_) { | |
148 case '{': | |
149 Recognize(TokenLBrace); | |
150 break; | |
151 case '}': | |
152 Recognize(TokenRBrace); | |
153 break; | |
154 case '[': | |
155 Recognize(TokenLBrack); | |
156 break; | |
157 case ']': | |
158 Recognize(TokenRBrack); | |
159 break; | |
160 case ':': | |
161 Recognize(TokenColon); | |
162 break; | |
163 case ',': | |
164 Recognize(TokenComma); | |
165 break; | |
166 case '"': | |
167 ScanString(); | |
168 break; | |
169 case '0': | |
170 case '1': | |
171 case '2': | |
172 case '3': | |
173 case '4': | |
174 case '5': | |
175 case '6': | |
176 case '7': | |
177 case '8': | |
178 case '9': | |
179 case '-': | |
180 ScanNumber(); | |
181 break; | |
182 default: | |
183 if (IsLiteral("true")) { | |
184 token_ = TokenTrue; | |
185 token_length_ = 4; | |
186 } else if (IsLiteral("false")) { | |
187 token_ = TokenFalse; | |
188 token_length_ = 5; | |
189 } else if (IsLiteral("null")) { | |
190 token_ = TokenNull; | |
191 token_length_ = 4; | |
192 } else { | |
193 token_length_ = 0; | |
194 token_ = TokenIllegal; | |
195 } | |
196 } | |
197 } | |
198 | |
199 | |
200 JSONReader::JSONReader(const char* json_object) | |
201 : scanner_(json_object) { | |
202 Set(json_object); | |
203 } | |
204 | |
205 | |
206 void JSONReader::Set(const char* json_object) { | |
207 scanner_.SetText(json_object); | |
208 json_object_ = json_object; | |
209 error_ = false; | |
210 } | |
211 | |
212 | |
213 bool JSONReader::CheckMessage() { | |
214 scanner_.SetText(json_object_); | |
215 scanner_.Scan(); | |
216 CheckObject(); | |
217 return true; | |
218 } | |
219 | |
220 | |
221 void JSONReader::CheckValue() { | |
222 switch (scanner_.CurrentToken()) { | |
223 case JSONScanner::TokenLBrace: | |
224 CheckObject(); | |
225 break; | |
226 case JSONScanner::TokenLBrack: | |
227 CheckArray(); | |
228 break; | |
229 case JSONScanner::TokenString: { | |
230 // Check the encoding. | |
231 const char* s = ValueChars(); | |
232 int remaining = ValueLen(); | |
233 while (remaining > 0) { | |
234 if ((*s == '\n') || (*s == '\t')) { | |
235 OS::Print("Un-escaped character in JSON string: '%s'\n", | |
236 ValueChars()); | |
237 FATAL("illegal character in JSON string value"); | |
238 } | |
239 s++; | |
240 remaining--; | |
241 } | |
242 scanner_.Scan(); | |
243 break; | |
244 } | |
245 case JSONScanner::TokenInteger: | |
246 case JSONScanner::TokenTrue: | |
247 case JSONScanner::TokenFalse: | |
248 case JSONScanner::TokenNull: | |
249 scanner_.Scan(); | |
250 break; | |
251 default: | |
252 OS::Print("Malformed JSON: expected a value but got '%s'\n", | |
253 scanner_.TokenChars()); | |
254 FATAL("illegal JSON value found"); | |
255 } | |
256 } | |
257 | |
258 | |
259 #if defined (DEBUG) | |
260 #define CHECK_TOKEN(token) \ | |
261 if (scanner_.CurrentToken() != token) { \ | |
262 OS::Print("Malformed JSON: expected %s but got '%s'\n", \ | |
263 #token, scanner_.TokenChars()); \ | |
264 intptr_t offset = scanner_.TokenChars() - this->json_object_; \ | |
265 OS::Print("Malformed JSON: expected %s at offset %" Pd "of buffer:\n%s\n", \ | |
266 #token, offset, this->json_object_); \ | |
267 ASSERT(scanner_.CurrentToken() == token); \ | |
268 } | |
269 #else | |
270 #define CHECK_TOKEN(token) | |
271 #endif | |
272 | |
273 | |
274 void JSONReader::CheckArray() { | |
275 CHECK_TOKEN(JSONScanner::TokenLBrack); | |
276 scanner_.Scan(); | |
277 while (scanner_.CurrentToken() != JSONScanner::TokenRBrack) { | |
278 CheckValue(); | |
279 if (scanner_.CurrentToken() != JSONScanner::TokenComma) { | |
280 break; | |
281 } | |
282 scanner_.Scan(); | |
283 } | |
284 CHECK_TOKEN(JSONScanner::TokenRBrack); | |
285 scanner_.Scan(); | |
286 } | |
287 | |
288 | |
289 void JSONReader::CheckObject() { | |
290 CHECK_TOKEN(JSONScanner::TokenLBrace); | |
291 scanner_.Scan(); | |
292 while (scanner_.CurrentToken() == JSONScanner::TokenString) { | |
293 scanner_.Scan(); | |
294 CHECK_TOKEN(JSONScanner::TokenColon); | |
295 scanner_.Scan(); | |
296 CheckValue(); | |
297 if (scanner_.CurrentToken() != JSONScanner::TokenComma) { | |
298 break; | |
299 } | |
300 scanner_.Scan(); | |
301 } | |
302 CHECK_TOKEN(JSONScanner::TokenRBrace); | |
303 scanner_.Scan(); | |
304 } | |
305 | |
306 #undef CHECK_TOKEN | |
307 | |
308 | |
309 bool JSONReader::Seek(const char* name) { | |
310 error_ = false; | |
311 scanner_.SetText(json_object_); | |
312 scanner_.Scan(); | |
313 if (scanner_.CurrentToken() != JSONScanner::TokenLBrace) { | |
314 error_ = true; | |
315 return false; | |
316 } | |
317 scanner_.Scan(); | |
318 if (scanner_.CurrentToken() == JSONScanner::TokenRBrace) { | |
319 return false; | |
320 } | |
321 while (scanner_.CurrentToken() == JSONScanner::TokenString) { | |
322 bool found = scanner_.IsStringLiteral(name); | |
323 scanner_.Scan(); | |
324 if (scanner_.CurrentToken() != JSONScanner::TokenColon) { | |
325 error_ = true; | |
326 return false; | |
327 } | |
328 scanner_.Scan(); | |
329 switch (scanner_.CurrentToken()) { | |
330 case JSONScanner::TokenString: | |
331 case JSONScanner::TokenInteger: | |
332 case JSONScanner::TokenLBrace: | |
333 case JSONScanner::TokenLBrack: | |
334 case JSONScanner::TokenTrue: | |
335 case JSONScanner::TokenFalse: | |
336 case JSONScanner::TokenNull: | |
337 // Found a legal value. | |
338 if (found) { | |
339 return true; | |
340 } | |
341 break; | |
342 default: | |
343 error_ = true; | |
344 return false; | |
345 } | |
346 // Skip the value. | |
347 if (scanner_.CurrentToken() == JSONScanner::TokenLBrace) { | |
348 scanner_.Skip(JSONScanner::TokenRBrace); | |
349 if (scanner_.CurrentToken() != JSONScanner::TokenRBrace) { | |
350 error_ = true; | |
351 return false; | |
352 } | |
353 } else if (scanner_.CurrentToken() == JSONScanner::TokenLBrack) { | |
354 scanner_.Skip(JSONScanner::TokenRBrack); | |
355 if (scanner_.CurrentToken() != JSONScanner::TokenRBrack) { | |
356 error_ = true; | |
357 return false; | |
358 } | |
359 } | |
360 scanner_.Scan(); // Value or closing brace or bracket. | |
361 if (scanner_.CurrentToken() == JSONScanner::TokenComma) { | |
362 scanner_.Scan(); | |
363 } else if (scanner_.CurrentToken() == JSONScanner::TokenRBrace) { | |
364 return false; | |
365 } else { | |
366 error_ = true; | |
367 return false; | |
368 } | |
369 } | |
370 error_ = true; | |
371 return false; | |
372 } | |
373 | |
374 | |
375 const char* JSONReader::EndOfObject() { | |
376 bool found = Seek("***"); // Look for illegally named value. | |
377 ASSERT(!found); | |
378 if (!found && !error_) { | |
379 const char* s = scanner_.TokenChars(); | |
380 ASSERT(*s == '}'); | |
381 return s; | |
382 } | |
383 return NULL; | |
384 } | |
385 | |
386 | |
387 JSONReader::JSONType JSONReader::Type() const { | |
388 if (error_) { | |
389 return kNone; | |
390 } | |
391 switch (scanner_.CurrentToken()) { | |
392 case JSONScanner::TokenString: | |
393 return kString; | |
394 case JSONScanner::TokenInteger: | |
395 return kInteger; | |
396 case JSONScanner::TokenLBrace: | |
397 return kObject; | |
398 case JSONScanner::TokenLBrack: | |
399 return kArray; | |
400 case JSONScanner::TokenTrue: | |
401 case JSONScanner::TokenFalse: | |
402 case JSONScanner::TokenNull: | |
403 return kLiteral; | |
404 default: | |
405 return kNone; | |
406 } | |
407 } | |
408 | |
409 | |
410 void JSONReader::GetRawValueChars(char* buf, intptr_t buflen) const { | |
411 if (Type() == kNone) { | |
412 return; | |
413 } | |
414 intptr_t max = buflen - 1; | |
415 if (ValueLen() < max) { | |
416 max = ValueLen(); | |
417 } | |
418 const char* val = ValueChars(); | |
419 intptr_t i = 0; | |
420 for (; i < max; i++) { | |
421 buf[i] = val[i]; | |
422 } | |
423 buf[i] = '\0'; | |
424 } | |
425 | |
426 | |
427 void JSONReader::GetDecodedValueChars(char* buf, intptr_t buflen) const { | |
428 if (Type() == kNone) { | |
429 return; | |
430 } | |
431 const intptr_t last_idx = buflen - 1; | |
432 const intptr_t value_len = ValueLen(); | |
433 const char* val = ValueChars(); | |
434 intptr_t buf_idx = 0; | |
435 intptr_t val_idx = 0; | |
436 while ((buf_idx < last_idx) && (val_idx < value_len)) { | |
437 char ch = val[val_idx]; | |
438 val_idx++; | |
439 buf[buf_idx] = ch; | |
440 if ((ch == '\\') && (val_idx < value_len)) { | |
441 switch (val[val_idx]) { | |
442 case '"': | |
443 case '\\': | |
444 case '/': | |
445 buf[buf_idx] = val[val_idx]; | |
446 val_idx++; | |
447 break; | |
448 case 'b': | |
449 buf[buf_idx] = '\b'; | |
450 val_idx++; | |
451 break; | |
452 case 'f': | |
453 buf[buf_idx] = '\f'; | |
454 val_idx++; | |
455 break; | |
456 case 'n': | |
457 buf[buf_idx] = '\n'; | |
458 val_idx++; | |
459 break; | |
460 case 'r': | |
461 buf[buf_idx] = '\r'; | |
462 val_idx++; | |
463 break; | |
464 case 't': | |
465 buf[buf_idx] = '\t'; | |
466 val_idx++; | |
467 break; | |
468 case 'u': | |
469 // \u00XX | |
470 // If the value is malformed or > 255, ignore and copy the | |
471 // encoded characters. | |
472 if ((val_idx < value_len - 4) && | |
473 (val[val_idx + 1] == '0') && (val[val_idx + 2] == '0') && | |
474 Utils::IsHexDigit(val[val_idx + 3]) && | |
475 Utils::IsHexDigit(val[val_idx + 4])) { | |
476 buf[buf_idx] = 16 * Utils::HexDigitToInt(val[val_idx + 3]) + | |
477 Utils::HexDigitToInt(val[val_idx + 4]); | |
478 val_idx += 5; | |
479 } | |
480 break; | |
481 default: | |
482 // Nothing. Copy the character after the backslash | |
483 // in the next loop iteration. | |
484 break; | |
485 } | |
486 } | |
487 buf_idx++; | |
488 } | |
489 buf[buf_idx] = '\0'; | |
490 } | |
491 | |
492 | |
493 TextBuffer::TextBuffer(intptr_t buf_size) { | |
494 ASSERT(buf_size > 0); | |
495 buf_ = reinterpret_cast<char*>(malloc(buf_size)); | |
496 buf_size_ = buf_size; | |
497 Clear(); | |
498 } | |
499 | |
500 | |
501 TextBuffer::~TextBuffer() { | |
502 free(buf_); | |
503 buf_ = NULL; | |
504 } | |
505 | |
506 | |
507 void TextBuffer::Clear() { | |
508 msg_len_ = 0; | |
509 buf_[0] = '\0'; | |
510 } | |
511 | |
512 | |
513 const char* TextBuffer::Steal() { | |
514 const char* r = buf_; | |
515 buf_ = NULL; | |
516 buf_size_ = 0; | |
517 msg_len_ = 0; | |
518 return r; | |
519 } | |
520 | |
521 | |
522 void TextBuffer::AddChar(char ch) { | |
523 EnsureCapacity(sizeof(ch)); | |
524 buf_[msg_len_] = ch; | |
525 msg_len_++; | |
526 buf_[msg_len_] = '\0'; | |
527 } | |
528 | |
529 | |
530 intptr_t TextBuffer::Printf(const char* format, ...) { | |
531 va_list args; | |
532 va_start(args, format); | |
533 intptr_t remaining = buf_size_ - msg_len_; | |
534 ASSERT(remaining >= 0); | |
535 intptr_t len = OS::VSNPrint(buf_ + msg_len_, remaining, format, args); | |
536 va_end(args); | |
537 if (len >= remaining) { | |
538 EnsureCapacity(len); | |
539 remaining = buf_size_ - msg_len_; | |
540 ASSERT(remaining > len); | |
541 va_list args2; | |
542 va_start(args2, format); | |
543 intptr_t len2 = OS::VSNPrint(buf_ + msg_len_, remaining, format, args2); | |
544 va_end(args2); | |
545 ASSERT(len == len2); | |
546 } | |
547 msg_len_ += len; | |
548 buf_[msg_len_] = '\0'; | |
549 return len; | |
550 } | |
551 | |
552 | |
553 // Write a UTF-16 code unit so it can be read by a JSON parser in a string | |
554 // literal. Use escape sequences for characters other than printable ASCII. | |
555 void TextBuffer::EscapeAndAddCodeUnit(uint32_t codeunit) { | |
556 switch (codeunit) { | |
557 case '"': | |
558 Printf("%s", "\\\""); | |
559 break; | |
560 case '\\': | |
561 Printf("%s", "\\\\"); | |
562 break; | |
563 case '/': | |
564 Printf("%s", "\\/"); | |
565 break; | |
566 case '\b': | |
567 Printf("%s", "\\b"); | |
568 break; | |
569 case '\f': | |
570 Printf("%s", "\\f"); | |
571 break; | |
572 case '\n': | |
573 Printf("%s", "\\n"); | |
574 break; | |
575 case '\r': | |
576 Printf("%s", "\\r"); | |
577 break; | |
578 case '\t': | |
579 Printf("%s", "\\t"); | |
580 break; | |
581 default: | |
582 if (codeunit < 0x20) { | |
583 // Encode character as \u00HH. | |
584 uint32_t digit2 = (codeunit >> 4) & 0xf; | |
585 uint32_t digit3 = (codeunit & 0xf); | |
586 Printf("\\u00%c%c", | |
587 digit2 > 9 ? 'A' + (digit2 - 10) : '0' + digit2, | |
588 digit3 > 9 ? 'A' + (digit3 - 10) : '0' + digit3); | |
589 } else if (codeunit > 127) { | |
590 // Encode character as \uHHHH. | |
591 uint32_t digit0 = (codeunit >> 12) & 0xf; | |
592 uint32_t digit1 = (codeunit >> 8) & 0xf; | |
593 uint32_t digit2 = (codeunit >> 4) & 0xf; | |
594 uint32_t digit3 = (codeunit & 0xf); | |
595 Printf("\\u%c%c%c%c", | |
596 digit0 > 9 ? 'A' + (digit0 - 10) : '0' + digit0, | |
597 digit1 > 9 ? 'A' + (digit1 - 10) : '0' + digit1, | |
598 digit2 > 9 ? 'A' + (digit2 - 10) : '0' + digit2, | |
599 digit3 > 9 ? 'A' + (digit3 - 10) : '0' + digit3); | |
600 } else { | |
601 AddChar(codeunit); | |
602 } | |
603 } | |
604 } | |
605 | |
606 | |
607 void TextBuffer::AddString(const char* s) { | |
608 Printf("%s", s); | |
609 } | |
610 | |
611 | |
612 void TextBuffer::AddEscapedString(const char* s) { | |
613 intptr_t len = strlen(s); | |
614 for (int i = 0; i < len; i++) { | |
615 EscapeAndAddCodeUnit(s[i]); | |
616 } | |
617 } | |
618 | |
619 | |
620 void TextBuffer::EnsureCapacity(intptr_t len) { | |
621 intptr_t remaining = buf_size_ - msg_len_; | |
622 if (remaining <= len) { | |
623 const int kBufferSpareCapacity = 64; // Somewhat arbitrary. | |
624 // TODO(turnidge): do we need to guard against overflow or other | |
625 // security issues here? Text buffers are used by the debugger | |
626 // to send user-controlled data (e.g. values of string variables) to | |
627 // the debugger front-end. | |
628 intptr_t new_size = buf_size_ + len + kBufferSpareCapacity; | |
629 char* new_buf = reinterpret_cast<char*>(realloc(buf_, new_size)); | |
630 ASSERT(new_buf != NULL); | |
631 buf_ = new_buf; | |
632 buf_size_ = new_size; | |
633 } | |
634 } | |
635 | |
636 } // namespace dart | |
OLD | NEW |