Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(389)

Side by Side Diff: src/json-parser.h

Issue 7134010: Specialize JSON parser to only check for SequentialAsciiString once. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 9 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/SConscript ('k') | src/json-parser.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 10 matching lines...) Expand all
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 #ifndef V8_JSON_PARSER_H_ 28 #ifndef V8_JSON_PARSER_H_
29 #define V8_JSON_PARSER_H_ 29 #define V8_JSON_PARSER_H_
30 30
31 #include "v8.h"
32
33 #include "char-predicates-inl.h"
34 #include "conversions.h"
35 #include "messages.h"
36 #include "spaces-inl.h"
31 #include "token.h" 37 #include "token.h"
32 38
33 namespace v8 { 39 namespace v8 {
34 namespace internal { 40 namespace internal {
35 41
36 // A simple json parser. 42 // A simple json parser.
43 template <bool seq_ascii>
37 class JsonParser BASE_EMBEDDED { 44 class JsonParser BASE_EMBEDDED {
38 public: 45 public:
39 static Handle<Object> Parse(Handle<String> source) { 46 static Handle<Object> Parse(Handle<String> source) {
40 return JsonParser().ParseJson(source); 47 return JsonParser().ParseJson(source);
41 } 48 }
42 49
43 static const int kEndOfString = -1; 50 static const int kEndOfString = -1;
44 51
45 private: 52 private:
46 // Parse a string containing a single JSON value. 53 // Parse a string containing a single JSON value.
47 Handle<Object> ParseJson(Handle<String> source); 54 Handle<Object> ParseJson(Handle<String> source);
48 55
49 inline void Advance() { 56 inline void Advance() {
50 position_++; 57 position_++;
51 if (position_ > source_length_) { 58 if (position_ > source_length_) {
52 c0_ = kEndOfString; 59 c0_ = kEndOfString;
53 } else if (is_sequential_ascii_) { 60 } else if (seq_ascii) {
54 c0_ = seq_source_->SeqAsciiStringGet(position_); 61 c0_ = seq_source_->SeqAsciiStringGet(position_);
55 } else { 62 } else {
56 c0_ = source_->Get(position_); 63 c0_ = source_->Get(position_);
57 } 64 }
58 } 65 }
59 66
60 // The JSON lexical grammar is specified in the ECMAScript 5 standard, 67 // The JSON lexical grammar is specified in the ECMAScript 5 standard,
61 // section 15.12.1.1. The only allowed whitespace characters between tokens 68 // section 15.12.1.1. The only allowed whitespace characters between tokens
62 // are tab, carriage-return, newline and space. 69 // are tab, carriage-return, newline and space.
63 70
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
142 inline Isolate* isolate() { return isolate_; } 149 inline Isolate* isolate() { return isolate_; }
143 150
144 static const int kInitialSpecialStringSize = 1024; 151 static const int kInitialSpecialStringSize = 1024;
145 152
146 153
147 private: 154 private:
148 Handle<String> source_; 155 Handle<String> source_;
149 int source_length_; 156 int source_length_;
150 Handle<SeqAsciiString> seq_source_; 157 Handle<SeqAsciiString> seq_source_;
151 158
152 bool is_sequential_ascii_;
153 // begin and end position of scanned string or number 159 // begin and end position of scanned string or number
154 int beg_pos_; 160 int beg_pos_;
155 int end_pos_; 161 int end_pos_;
156 162
157 Isolate* isolate_; 163 Isolate* isolate_;
158 uc32 c0_; 164 uc32 c0_;
159 int position_; 165 int position_;
160 166
161 double number_; 167 double number_;
162 }; 168 };
163 169
170 template <bool seq_ascii>
171 Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source) {
172 isolate_ = source->map()->isolate();
173 source_ = Handle<String>(source->TryFlattenGetString());
174 source_length_ = source_->length() - 1;
175
176 // Optimized fast case where we only have ascii characters.
177 if (seq_ascii) {
178 seq_source_ = Handle<SeqAsciiString>::cast(source_);
179 }
180
181 // Set initial position right before the string.
182 position_ = -1;
183 // Advance to the first character (posibly EOS)
184 AdvanceSkipWhitespace();
185 Handle<Object> result = ParseJsonValue();
186 if (result.is_null() || c0_ != kEndOfString) {
187 // Parse failed. Current character is the unexpected token.
188
189 const char* message;
190 Factory* factory = isolate()->factory();
191 Handle<JSArray> array;
192
193 switch (c0_) {
194 case kEndOfString:
195 message = "unexpected_eos";
196 array = factory->NewJSArray(0);
197 break;
198 case '-':
199 case '0':
200 case '1':
201 case '2':
202 case '3':
203 case '4':
204 case '5':
205 case '6':
206 case '7':
207 case '8':
208 case '9':
209 message = "unexpected_token_number";
210 array = factory->NewJSArray(0);
211 break;
212 case '"':
213 message = "unexpected_token_string";
214 array = factory->NewJSArray(0);
215 break;
216 default:
217 message = "unexpected_token";
218 Handle<Object> name = LookupSingleCharacterStringFromCode(c0_);
219 Handle<FixedArray> element = factory->NewFixedArray(1);
220 element->set(0, *name);
221 array = factory->NewJSArrayWithElements(element);
222 break;
223 }
224
225 MessageLocation location(factory->NewScript(source),
226 position_,
227 position_ + 1);
228 Handle<Object> result = factory->NewSyntaxError(message, array);
229 isolate()->Throw(*result, &location);
230 return Handle<Object>::null();
231 }
232 return result;
233 }
234
235
236 // Parse any JSON value.
237 template <bool seq_ascii>
238 Handle<Object> JsonParser<seq_ascii>::ParseJsonValue() {
239 switch (c0_) {
240 case '"':
241 return ParseJsonString();
242 case '-':
243 case '0':
244 case '1':
245 case '2':
246 case '3':
247 case '4':
248 case '5':
249 case '6':
250 case '7':
251 case '8':
252 case '9':
253 return ParseJsonNumber();
254 case 'f':
255 if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' &&
256 AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') {
257 AdvanceSkipWhitespace();
258 return isolate()->factory()->false_value();
259 } else {
260 return ReportUnexpectedCharacter();
261 }
262 case 't':
263 if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' &&
264 AdvanceGetChar() == 'e') {
265 AdvanceSkipWhitespace();
266 return isolate()->factory()->true_value();
267 } else {
268 return ReportUnexpectedCharacter();
269 }
270 case 'n':
271 if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' &&
272 AdvanceGetChar() == 'l') {
273 AdvanceSkipWhitespace();
274 return isolate()->factory()->null_value();
275 } else {
276 return ReportUnexpectedCharacter();
277 }
278 case '{':
279 return ParseJsonObject();
280 case '[':
281 return ParseJsonArray();
282 default:
283 return ReportUnexpectedCharacter();
284 }
285 }
286
287
288 // Parse a JSON object. Position must be right at '{'.
289 template <bool seq_ascii>
290 Handle<Object> JsonParser<seq_ascii>::ParseJsonObject() {
291 Handle<JSFunction> object_constructor(
292 isolate()->global_context()->object_function());
293 Handle<JSObject> json_object =
294 isolate()->factory()->NewJSObject(object_constructor);
295 ASSERT_EQ(c0_, '{');
296
297 AdvanceSkipWhitespace();
298 if (c0_ != '}') {
299 do {
300 if (c0_ != '"') return ReportUnexpectedCharacter();
301 Handle<String> key = ParseJsonSymbol();
302 if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter();
303 AdvanceSkipWhitespace();
304 Handle<Object> value = ParseJsonValue();
305 if (value.is_null()) return ReportUnexpectedCharacter();
306
307 uint32_t index;
308 if (key->AsArrayIndex(&index)) {
309 SetOwnElement(json_object, index, value, kNonStrictMode);
310 } else if (key->Equals(isolate()->heap()->Proto_symbol())) {
311 SetPrototype(json_object, value);
312 } else {
313 SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE);
314 }
315 } while (MatchSkipWhiteSpace(','));
316 if (c0_ != '}') {
317 return ReportUnexpectedCharacter();
318 }
319 }
320 AdvanceSkipWhitespace();
321 return json_object;
322 }
323
324 // Parse a JSON array. Position must be right at '['.
325 template <bool seq_ascii>
326 Handle<Object> JsonParser<seq_ascii>::ParseJsonArray() {
327 ZoneScope zone_scope(isolate(), DELETE_ON_EXIT);
328 ZoneList<Handle<Object> > elements(4);
329 ASSERT_EQ(c0_, '[');
330
331 AdvanceSkipWhitespace();
332 if (c0_ != ']') {
333 do {
334 Handle<Object> element = ParseJsonValue();
335 if (element.is_null()) return ReportUnexpectedCharacter();
336 elements.Add(element);
337 } while (MatchSkipWhiteSpace(','));
338 if (c0_ != ']') {
339 return ReportUnexpectedCharacter();
340 }
341 }
342 AdvanceSkipWhitespace();
343 // Allocate a fixed array with all the elements.
344 Handle<FixedArray> fast_elements =
345 isolate()->factory()->NewFixedArray(elements.length());
346 for (int i = 0, n = elements.length(); i < n; i++) {
347 fast_elements->set(i, *elements[i]);
348 }
349 return isolate()->factory()->NewJSArrayWithElements(fast_elements);
350 }
351
352
353 template <bool seq_ascii>
354 Handle<Object> JsonParser<seq_ascii>::ParseJsonNumber() {
355 bool negative = false;
356 beg_pos_ = position_;
357 if (c0_ == '-') {
358 Advance();
359 negative = true;
360 }
361 if (c0_ == '0') {
362 Advance();
363 // Prefix zero is only allowed if it's the only digit before
364 // a decimal point or exponent.
365 if ('0' <= c0_ && c0_ <= '9') return ReportUnexpectedCharacter();
366 } else {
367 int i = 0;
368 int digits = 0;
369 if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter();
370 do {
371 i = i * 10 + c0_ - '0';
372 digits++;
373 Advance();
374 } while (c0_ >= '0' && c0_ <= '9');
375 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
376 number_ = (negative ? -i : i);
377 SkipWhitespace();
378 return isolate()->factory()->NewNumber(number_);
379 }
380 }
381 if (c0_ == '.') {
382 Advance();
383 if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter();
384 do {
385 Advance();
386 } while (c0_ >= '0' && c0_ <= '9');
387 }
388 if (AsciiAlphaToLower(c0_) == 'e') {
389 Advance();
390 if (c0_ == '-' || c0_ == '+') Advance();
391 if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter();
392 do {
393 Advance();
394 } while (c0_ >= '0' && c0_ <= '9');
395 }
396 int length = position_ - beg_pos_;
397 if (seq_ascii) {
398 Vector<const char> chars(seq_source_->GetChars() + beg_pos_, length);
399 number_ = StringToDouble(isolate()->unicode_cache(),
400 chars,
401 NO_FLAGS, // Hex, octal or trailing junk.
402 OS::nan_value());
403 } else {
404 Vector<char> buffer = Vector<char>::New(length);
405 String::WriteToFlat(*source_, buffer.start(), beg_pos_, position_);
406 Vector<const char> result =
407 Vector<const char>(reinterpret_cast<const char*>(buffer.start()),
408 length);
409 number_ = StringToDouble(isolate()->unicode_cache(),
410 result,
411 NO_FLAGS, // Hex, octal or trailing junk.
412 0.0);
413 buffer.Dispose();
414 }
415 SkipWhitespace();
416 return isolate()->factory()->NewNumber(number_);
417 }
418
419 template <bool seq_ascii>
420 Handle<String> JsonParser<seq_ascii>::SlowScanJsonString() {
421 // The currently scanned ascii characters.
422 Handle<String> ascii(isolate()->factory()->NewSubString(source_,
423 beg_pos_,
424 position_));
425 Handle<String> two_byte =
426 isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize,
427 NOT_TENURED);
428 Handle<SeqTwoByteString> seq_two_byte =
429 Handle<SeqTwoByteString>::cast(two_byte);
430
431 int allocation_count = 1;
432 int count = 0;
433
434 while (c0_ != '"') {
435 // Create new seq string
436 if (count >= kInitialSpecialStringSize * allocation_count) {
437 allocation_count = allocation_count * 2;
438 int new_size = allocation_count * kInitialSpecialStringSize;
439 Handle<String> new_two_byte =
440 isolate()->factory()->NewRawTwoByteString(new_size,
441 NOT_TENURED);
442 uc16* char_start =
443 Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars();
444 String::WriteToFlat(*seq_two_byte, char_start, 0, count);
445 seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte);
446 }
447
448 // Check for control character (0x00-0x1f) or unterminated string (<0).
449 if (c0_ < 0x20) return Handle<String>::null();
450 if (c0_ != '\\') {
451 seq_two_byte->SeqTwoByteStringSet(count++, c0_);
452 Advance();
453 } else {
454 Advance();
455 switch (c0_) {
456 case '"':
457 case '\\':
458 case '/':
459 seq_two_byte->SeqTwoByteStringSet(count++, c0_);
460 break;
461 case 'b':
462 seq_two_byte->SeqTwoByteStringSet(count++, '\x08');
463 break;
464 case 'f':
465 seq_two_byte->SeqTwoByteStringSet(count++, '\x0c');
466 break;
467 case 'n':
468 seq_two_byte->SeqTwoByteStringSet(count++, '\x0a');
469 break;
470 case 'r':
471 seq_two_byte->SeqTwoByteStringSet(count++, '\x0d');
472 break;
473 case 't':
474 seq_two_byte->SeqTwoByteStringSet(count++, '\x09');
475 break;
476 case 'u': {
477 uc32 value = 0;
478 for (int i = 0; i < 4; i++) {
479 Advance();
480 int digit = HexValue(c0_);
481 if (digit < 0) {
482 return Handle<String>::null();
483 }
484 value = value * 16 + digit;
485 }
486 seq_two_byte->SeqTwoByteStringSet(count++, value);
487 break;
488 }
489 default:
490 return Handle<String>::null();
491 }
492 Advance();
493 }
494 }
495 // Advance past the last '"'.
496 ASSERT_EQ('"', c0_);
497 AdvanceSkipWhitespace();
498
499 // Shrink the the string to our length.
500 if (isolate()->heap()->InNewSpace(*seq_two_byte)) {
501 isolate()->heap()->new_space()->
502 template ShrinkStringAtAllocationBoundary<SeqTwoByteString>(
503 *seq_two_byte, count);
504 } else {
505 int string_size = SeqTwoByteString::SizeFor(count);
506 int allocated_string_size =
507 SeqTwoByteString::SizeFor(kInitialSpecialStringSize * allocation_count);
508 int delta = allocated_string_size - string_size;
509 Address start_filler_object = seq_two_byte->address() + string_size;
510 seq_two_byte->set_length(count);
511 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta);
512 }
513 return isolate()->factory()->NewConsString(ascii, seq_two_byte);
514 }
515
516 template <bool seq_ascii>
517 template <bool is_symbol>
518 Handle<String> JsonParser<seq_ascii>::ScanJsonString() {
519 ASSERT_EQ('"', c0_);
520 Advance();
521 beg_pos_ = position_;
522 // Fast case for ascii only without escape characters.
523 while (c0_ != '"') {
524 // Check for control character (0x00-0x1f) or unterminated string (<0).
525 if (c0_ < 0x20) return Handle<String>::null();
526 if (c0_ != '\\' && (seq_ascii || c0_ < kMaxAsciiCharCode)) {
527 Advance();
528 } else {
529 return this->SlowScanJsonString();
530 }
531 }
532 ASSERT_EQ('"', c0_);
533 end_pos_ = position_;
534 // Advance past the last '"'.
535 AdvanceSkipWhitespace();
536 if (seq_ascii && is_symbol) {
537 return isolate()->factory()->LookupAsciiSymbol(seq_source_,
538 beg_pos_,
539 end_pos_ - beg_pos_);
540 } else {
541 return isolate()->factory()->NewSubString(source_, beg_pos_, end_pos_);
542 }
543 }
544
164 } } // namespace v8::internal 545 } } // namespace v8::internal
165 546
166 #endif // V8_JSON_PARSER_H_ 547 #endif // V8_JSON_PARSER_H_
OLDNEW
« no previous file with comments | « src/SConscript ('k') | src/json-parser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698