Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(74)

Side by Side Diff: src/json-parser.cc

Issue 7039037: Create stand-alone json parser (including scanner). (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 9 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 #include "v8.h"
29
30 #include "char-predicates-inl.h"
31 #include "conversions.h"
32 #include "json-parser.h"
33 #include "messages.h"
34 #include "spaces.h"
35
36 namespace v8 {
37 namespace internal {
38
39
40 Handle<Object> JsonParser::ParseJson(Handle<String> source) {
41 source_ = Handle<String>(source->TryFlattenGetString());
42 source_length_ = source_->length() - 1;
43
44 // Optimized fast case where we only have ascii characters.
45 if (source_->IsSeqAsciiString()) {
46 is_sequential_ascii_ = true;
Lasse Reichstein 2011/05/19 07:27:40 This is not a property that is guaranteed to be pr
Rico 2011/05/23 18:18:12 As discussed offline, this is not the case right?
Lasse Reichstein 2011/05/24 07:28:34 That what is not the case? That strings can change
Rico 2011/05/24 08:49:24 OK, changed so that we reinitialize this variable
47 seq_source_ = Handle<SeqAsciiString>::cast(source_);
48 } else {
49 is_sequential_ascii_ = false;
50 }
51
52 // Set initial position right before the string.
53 position_ = -1;
54 // Advance to the first character (posibly EOS)
55 Advance();
56 Next();
57 Handle<Object> result = ParseJsonValue();
58 if (result.is_null() || Next() != Token::EOS) {
59 // Parse failed. Scanner's current token is the unexpected token.
60 Token::Value token = current_.token;
61
62 const char* message;
63 const char* name_opt = NULL;
64
65 switch (token) {
66 case Token::EOS:
67 message = "unexpected_eos";
68 break;
69 case Token::NUMBER:
70 message = "unexpected_token_number";
71 break;
72 case Token::STRING:
73 message = "unexpected_token_string";
74 break;
75 case Token::IDENTIFIER:
76 case Token::FUTURE_RESERVED_WORD:
77 message = "unexpected_token_identifier";
78 break;
79 default:
80 message = "unexpected_token";
81 name_opt = Token::String(token);
82 ASSERT(name_opt != NULL);
83 break;
84 }
85
86 Factory* factory = isolate()->factory();
87 MessageLocation location(factory->NewScript(source),
88 current_.beg_pos,
89 current_.end_pos);
90 Handle<JSArray> array;
91 if (name_opt == NULL) {
92 array = factory->NewJSArray(0);
93 } else {
94 Handle<String> name = factory->NewStringFromUtf8(CStrVector(name_opt));
95 Handle<FixedArray> element = factory->NewFixedArray(1);
96 element->set(0, *name);
97 array = factory->NewJSArrayWithElements(element);
98 }
99 Handle<Object> result = factory->NewSyntaxError(message, array);
100 isolate()->Throw(*result, &location);
101 return Handle<Object>::null();
102 }
103 return result;
104 }
105
106
107 // Parse any JSON value.
108 Handle<Object> JsonParser::ParseJsonValue() {
109 Token::Value token = Next();
110 switch (token) {
111 case Token::STRING:
112 return GetString(false);
113 case Token::NUMBER:
114 return isolate()->factory()->NewNumber(number_);
115 case Token::FALSE_LITERAL:
116 return isolate()->factory()->false_value();
117 case Token::TRUE_LITERAL:
118 return isolate()->factory()->true_value();
119 case Token::NULL_LITERAL:
120 return isolate()->factory()->null_value();
121 case Token::LBRACE:
122 return ParseJsonObject();
123 case Token::LBRACK:
124 return ParseJsonArray();
125 default:
126 return ReportUnexpectedToken();
127 }
128 }
129
130
131 // Parse a JSON object. Scanner must be right after '{' token.
132 Handle<Object> JsonParser::ParseJsonObject() {
133 Handle<JSFunction> object_constructor(
134 isolate()->global_context()->object_function());
135 Handle<JSObject> json_object =
136 isolate()->factory()->NewJSObject(object_constructor);
137
138 if (Peek() == Token::RBRACE) {
139 Next();
140 } else {
141 do {
142 if (Next() != Token::STRING) {
143 return ReportUnexpectedToken();
144 }
145 Handle<String> key = GetString(true);
Lasse Reichstein 2011/05/19 07:27:40 How about having two functions: GetString() and Ge
Rico 2011/05/23 18:18:12 Done.
146 if (Next() != Token::COLON) {
147 return ReportUnexpectedToken();
148 }
149
150 Handle<Object> value = ParseJsonValue();
151 if (value.is_null()) return Handle<Object>::null();
152
153 uint32_t index;
154 if (key->AsArrayIndex(&index)) {
155 SetOwnElement(json_object, index, value, kNonStrictMode);
156 } else if (key->Equals(isolate()->heap()->Proto_symbol())) {
157 SetPrototype(json_object, value);
158 } else {
159 SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE);
160 }
161 } while (Next() == Token::COMMA);
162 if (current_.token != Token::RBRACE) {
163 return ReportUnexpectedToken();
164 }
165 }
166 return json_object;
167 }
168
169 // Parse a JSON array. Scanner must be right after '[' token.
170 Handle<Object> JsonParser::ParseJsonArray() {
171 ZoneScope zone_scope(DELETE_ON_EXIT);
172 ZoneList<Handle<Object> > elements(4);
173
174 Token::Value token = Peek();
175 if (token == Token::RBRACK) {
176 Next();
177 } else {
178 do {
179 Handle<Object> element = ParseJsonValue();
180 if (element.is_null()) return Handle<Object>::null();
181 elements.Add(element);
182 token = Next();
183 } while (token == Token::COMMA);
184 if (token != Token::RBRACK) {
185 return ReportUnexpectedToken();
186 }
187 }
188
189 // Allocate a fixed array with all the elements.
190 Handle<FixedArray> fast_elements =
191 isolate()->factory()->NewFixedArray(elements.length());
192
193 for (int i = 0, n = elements.length(); i < n; i++) {
194 fast_elements->set(i, *elements[i]);
195 }
196
197 return isolate()->factory()->NewJSArrayWithElements(fast_elements);
198 }
199
200
201 Token::Value JsonParser::Next() {
202 current_ = next_;
203 ScanJson();
204 return current_.token;
205 }
206
207 void JsonParser::ScanJson() {
208 Token::Value token;
209 do {
210 // Remember the position of the next token
211 next_.beg_pos = position_;
212 switch (c0_) {
213 case '\t':
214 case '\r':
215 case '\n':
216 case ' ':
217 Advance();
218 token = Token::WHITESPACE;
219 break;
220 case '{':
221 Advance();
222 token = Token::LBRACE;
223 break;
224 case '}':
225 Advance();
226 token = Token::RBRACE;
227 break;
228 case '[':
229 Advance();
230 token = Token::LBRACK;
231 break;
232 case ']':
233 Advance();
234 token = Token::RBRACK;
235 break;
236 case ':':
237 Advance();
238 token = Token::COLON;
239 break;
240 case ',':
241 Advance();
242 token = Token::COMMA;
243 break;
244 case '"':
245 token = ScanJsonString();
246 break;
247 case '-':
248 case '0':
249 case '1':
250 case '2':
251 case '3':
252 case '4':
253 case '5':
254 case '6':
255 case '7':
256 case '8':
257 case '9':
258 token = ScanJsonNumber();
259 break;
260 case 't':
261 token = ScanJsonIdentifier("true", Token::TRUE_LITERAL);
262 break;
263 case 'f':
264 token = ScanJsonIdentifier("false", Token::FALSE_LITERAL);
265 break;
266 case 'n':
267 token = ScanJsonIdentifier("null", Token::NULL_LITERAL);
268 break;
269 default:
270 if (c0_ < 0) {
271 Advance();
272 token = Token::EOS;
273 } else {
274 Advance();
275 token = Token::ILLEGAL;
276 }
277 }
278 } while (token == Token::WHITESPACE);
279
280 next_.end_pos = position_;
281 next_.token = token;
282 }
283
284
285 Token::Value JsonParser::ScanJsonIdentifier(const char* text,
286 Token::Value token) {
287 while (*text != '\0') {
288 if (c0_ != *text) return Token::ILLEGAL;
289 Advance();
290 text++;
291 }
292 return token;
293 }
294
295
296 Token::Value JsonParser::ScanJsonNumber() {
297 bool negative = false;
298
299 if (c0_ == '-') {
300 Advance();
301 negative = true;
302 }
303 if (c0_ == '0') {
304 Advance();
305 // Prefix zero is only allowed if it's the only digit before
306 // a decimal point or exponent.
307 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;
308 } else {
309 int i = 0;
310 int digits = 0;
311 if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL;
312 do {
313 i = i * 10 + c0_ - '0';
314 digits++;
315 Advance();
316 } while (c0_ >= '0' && c0_ <= '9');
317 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
318 number_ = (negative ? -i : i);
319 return Token::NUMBER;
320 }
321 }
322 if (c0_ == '.') {
323 Advance();
324 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
325 do {
326 Advance();
327 } while (c0_ >= '0' && c0_ <= '9');
328 }
329 if (AsciiAlphaToLower(c0_) == 'e') {
330 Advance();
331 if (c0_ == '-' || c0_ == '+') Advance();
332 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
333 do {
334 Advance();
335 } while (c0_ >= '0' && c0_ <= '9');
336 }
337 if (is_sequential_ascii_) {
338 Vector<const char> chars(seq_source_->GetChars() + next_.beg_pos,
339 position_ - next_.beg_pos);
340 number_ = StringToDouble(isolate()->unicode_cache(),
341 chars,
342 NO_FLAGS, // Hex, octal or trailing junk.
343 OS::nan_value());
344 } else {
Lasse Reichstein 2011/05/19 07:27:40 Ick. Why create a heap string? Just make a buffer
Rico 2011/05/23 18:18:12 Fast atoi? this is a double. I added a conversion
345 Handle<String> value = isolate()->factory()->NewSubString(
346 source_, next_.beg_pos, position_);
347 number_ = StringToDouble(isolate()->unicode_cache(),
348 *value,
349 NO_FLAGS, // Hex, octal or trailing junk.
350 OS::nan_value());
351 }
352 return Token::NUMBER;
353 }
354
355 Token::Value JsonParser::SlowScanJsonString() {
356 // The currently scanned ascii characters.
357 Handle<String> ascii(isolate()->factory()->NewSubString(source_,
358 next_.beg_pos + 1,
359 position_));
360 Handle<String> two_byte =
361 isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize,
362 NOT_TENURED);
363 ASSERT(two_byte->IsSeqTwoByteString());
Lasse Reichstein 2011/05/19 07:27:40 Odd assert. How could that not happen? Consider ch
Rico 2011/05/23 18:18:12 Done.
364 Handle<SeqTwoByteString> seq_two_byte =
365 Handle<SeqTwoByteString>::cast(two_byte);
366
367 int allocation_count = 1;
368 int count = 0;
369
370 while (c0_ != '"') {
371 // Create new seq string
372 if (count >= kInitialSpecialStringSize * allocation_count) {
373 allocation_count++;
374 int new_size = allocation_count * kInitialSpecialStringSize;
375 Handle<String> new_two_byte =
376 isolate()->factory()->NewRawTwoByteString(new_size,
377 NOT_TENURED);
378 uc16* char_start =
379 Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars();
380 String::WriteToFlat(*seq_two_byte, char_start, 0, count);
381 seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte);
382 }
383
384 // Check for control character (0x00-0x1f) or unterminated string (<0).
385 if (c0_ < 0x20) return Token::ILLEGAL;
386 if (c0_ != '\\') {
387 seq_two_byte->SeqTwoByteStringSet(count++, c0_);
388 Advance();
389 } else {
390 Advance();
391 switch (c0_) {
392 case '"':
393 case '\\':
394 case '/':
395 seq_two_byte->SeqTwoByteStringSet(count++, c0_);
396 break;
397 case 'b':
398 seq_two_byte->SeqTwoByteStringSet(count++, '\x08');
399 break;
400 case 'f':
401 seq_two_byte->SeqTwoByteStringSet(count++, '\x0c');
402 break;
403 case 'n':
404 seq_two_byte->SeqTwoByteStringSet(count++, '\x0a');
405 break;
406 case 'r':
407 seq_two_byte->SeqTwoByteStringSet(count++, '\x0d');
408 break;
409 case 't':
410 seq_two_byte->SeqTwoByteStringSet(count++, '\x09');
411 break;
412 case 'u': {
413 uc32 value = 0;
414 for (int i = 0; i < 4; i++) {
415 Advance();
416 int digit = HexValue(c0_);
417 if (digit < 0) {
418 return Token::ILLEGAL;
419 }
420 value = value * 16 + digit;
421 }
422 seq_two_byte->SeqTwoByteStringSet(count++, value);
423 break;
424 }
425 default:
426 return Token::ILLEGAL;
427 }
428 Advance();
429 }
430 }
431 // Advance past the last '"'.
432 ASSERT_EQ('"', c0_);
433 Advance();
434
435 // Shrink the the string to our length.
436 isolate()->heap()->
437 new_space()->
438 ShrinkStringAtAllocationBoundary<SeqTwoByteString>(*seq_two_byte,
439 count);
Lasse Reichstein 2011/05/19 07:27:40 If the ascii string is short (e.g., it's shorter t
Rico 2011/05/23 18:18:12 Indeed, this is one of those optimizations for the
440 string_val_ = isolate()->factory()->NewConsString(ascii, seq_two_byte);
441 return Token::STRING;
442 }
443
444
445 Token::Value JsonParser::ScanJsonString() {
446 ASSERT_EQ('"', c0_);
447 // Set string_val to null. If string_val is not set we assume an
448 // ascii string begining at next_.beg_pos + 1 to next_.end_pos - 1.
449 string_val_ = Handle<String>::null();
450 Advance();
451 // Fast case for ascii only without escape characters.
452 while (c0_ != '"') {
453 // Check for control character (0x00-0x1f) or unterminated string (<0).
454 if (c0_ < 0x20) return Token::ILLEGAL;
455 if (c0_ != '\\' && c0_ < kMaxAsciiCharCode) {
456 Advance();
457 } else {
458 return SlowScanJsonString();
Lasse Reichstein 2011/05/19 07:27:40 The SlowScanJsonString creates a TwoByte string. I
Rico 2011/05/23 18:18:12 Yes
459 }
460 }
461 ASSERT_EQ('"', c0_);
462 // Advance past the last '"'.
463 Advance();
464 return Token::STRING;
465 }
466
467
468 Handle<String> JsonParser::GetString(bool is_symbol) {
469 // We have a non ascii string, return that.
470 if (!string_val_.is_null()) return string_val_;
Lasse Reichstein 2011/05/19 07:27:40 That does not make it a symbol if it isn't already
Rico 2011/05/23 18:18:12 renaming hint_symbol
471
472 if (is_sequential_ascii_ && is_symbol) {
473 Handle<SeqAsciiString> seq = Handle<SeqAsciiString>::cast(source_);
474 // The current token includes the '"' in both ends.
475 int length = current_.end_pos - current_.beg_pos - 2;
476 return isolate()->factory()->LookupAsciiSymbol(seq_source_,
477 current_.beg_pos + 1,
478 length);
479 }
480 // The current token includes the '"' in both ends.
481 return isolate()->factory()->NewSubString(
Lasse Reichstein 2011/05/19 07:27:40 This also doesn't make it a symbol even if is_symb
Rico 2011/05/23 18:18:12 Done.
482 source_, current_.beg_pos + 1, current_.end_pos - 1);
483 }
484
485 } } // namespace v8::internal
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698