Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(743)

Side by Side Diff: src/scanner.cc

Issue 7739020: Rename scanner.* to scanner-character-streams.* and scanner-base.* to scanner.* (Closed) Base URL: git://github.com/v8/v8.git@master
Patch Set: rename scanner-base.* to scanner.* Created 9 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/scanner.h ('k') | src/scanner-base.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution. 11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its 12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived 13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission. 14 // from this software without specific prior written permission.
15 // 15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 #include "v8.h" 28 // Features shared by parsing and pre-parsing scanners.
29 29
30 #include "ast.h"
31 #include "handles.h"
32 #include "scanner.h" 30 #include "scanner.h"
33 #include "unicode-inl.h" 31
32 #include "../include/v8stdint.h"
33 #include "char-predicates-inl.h"
34 34
35 namespace v8 { 35 namespace v8 {
36 namespace internal { 36 namespace internal {
37 37
38 // ---------------------------------------------------------------------------- 38 // ----------------------------------------------------------------------------
39 // BufferedUC16CharacterStreams 39 // Scanner
40 40
41 BufferedUC16CharacterStream::BufferedUC16CharacterStream() 41 Scanner::Scanner(UnicodeCache* unicode_cache)
42 : UC16CharacterStream(), 42 : unicode_cache_(unicode_cache) { }
43 pushback_limit_(NULL) { 43
44 // Initialize buffer as being empty. First read will fill the buffer. 44
45 buffer_cursor_ = buffer_; 45 uc32 Scanner::ScanHexNumber(int expected_length) {
46 buffer_end_ = buffer_; 46 ASSERT(expected_length <= 4); // prevent overflow
47 } 47
48 48 uc32 digits[4] = { 0, 0, 0, 0 };
49 BufferedUC16CharacterStream::~BufferedUC16CharacterStream() { } 49 uc32 x = 0;
50 50 for (int i = 0; i < expected_length; i++) {
51 void BufferedUC16CharacterStream::PushBack(uc32 character) { 51 digits[i] = c0_;
52 if (character == kEndOfInput) { 52 int d = HexValue(c0_);
53 pos_--; 53 if (d < 0) {
54 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes
55 // should be illegal, but other JS VMs just return the
56 // non-escaped version of the original character.
57
58 // Push back digits that we have advanced past.
59 for (int j = i-1; j >= 0; j--) {
60 PushBack(digits[j]);
61 }
62 return -1;
63 }
64 x = x * 16 + d;
65 Advance();
66 }
67
68 return x;
69 }
70
71
72
73 // ----------------------------------------------------------------------------
74 // JavaScriptScanner
75
76 JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants)
77 : Scanner(scanner_contants),
78 octal_pos_(Location::invalid()),
79 harmony_block_scoping_(false) { }
80
81
82 void JavaScriptScanner::Initialize(UC16CharacterStream* source) {
83 source_ = source;
84 // Need to capture identifiers in order to recognize "get" and "set"
85 // in object literals.
86 Init();
87 // Skip initial whitespace allowing HTML comment ends just like
88 // after a newline and scan first token.
89 has_line_terminator_before_next_ = true;
90 SkipWhiteSpace();
91 Scan();
92 }
93
94
95 // Ensure that tokens can be stored in a byte.
96 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
97
98 // Table of one-character tokens, by character (0x00..0x7f only).
99 static const byte one_char_tokens[] = {
100 Token::ILLEGAL,
101 Token::ILLEGAL,
102 Token::ILLEGAL,
103 Token::ILLEGAL,
104 Token::ILLEGAL,
105 Token::ILLEGAL,
106 Token::ILLEGAL,
107 Token::ILLEGAL,
108 Token::ILLEGAL,
109 Token::ILLEGAL,
110 Token::ILLEGAL,
111 Token::ILLEGAL,
112 Token::ILLEGAL,
113 Token::ILLEGAL,
114 Token::ILLEGAL,
115 Token::ILLEGAL,
116 Token::ILLEGAL,
117 Token::ILLEGAL,
118 Token::ILLEGAL,
119 Token::ILLEGAL,
120 Token::ILLEGAL,
121 Token::ILLEGAL,
122 Token::ILLEGAL,
123 Token::ILLEGAL,
124 Token::ILLEGAL,
125 Token::ILLEGAL,
126 Token::ILLEGAL,
127 Token::ILLEGAL,
128 Token::ILLEGAL,
129 Token::ILLEGAL,
130 Token::ILLEGAL,
131 Token::ILLEGAL,
132 Token::ILLEGAL,
133 Token::ILLEGAL,
134 Token::ILLEGAL,
135 Token::ILLEGAL,
136 Token::ILLEGAL,
137 Token::ILLEGAL,
138 Token::ILLEGAL,
139 Token::ILLEGAL,
140 Token::LPAREN, // 0x28
141 Token::RPAREN, // 0x29
142 Token::ILLEGAL,
143 Token::ILLEGAL,
144 Token::COMMA, // 0x2c
145 Token::ILLEGAL,
146 Token::ILLEGAL,
147 Token::ILLEGAL,
148 Token::ILLEGAL,
149 Token::ILLEGAL,
150 Token::ILLEGAL,
151 Token::ILLEGAL,
152 Token::ILLEGAL,
153 Token::ILLEGAL,
154 Token::ILLEGAL,
155 Token::ILLEGAL,
156 Token::ILLEGAL,
157 Token::ILLEGAL,
158 Token::COLON, // 0x3a
159 Token::SEMICOLON, // 0x3b
160 Token::ILLEGAL,
161 Token::ILLEGAL,
162 Token::ILLEGAL,
163 Token::CONDITIONAL, // 0x3f
164 Token::ILLEGAL,
165 Token::ILLEGAL,
166 Token::ILLEGAL,
167 Token::ILLEGAL,
168 Token::ILLEGAL,
169 Token::ILLEGAL,
170 Token::ILLEGAL,
171 Token::ILLEGAL,
172 Token::ILLEGAL,
173 Token::ILLEGAL,
174 Token::ILLEGAL,
175 Token::ILLEGAL,
176 Token::ILLEGAL,
177 Token::ILLEGAL,
178 Token::ILLEGAL,
179 Token::ILLEGAL,
180 Token::ILLEGAL,
181 Token::ILLEGAL,
182 Token::ILLEGAL,
183 Token::ILLEGAL,
184 Token::ILLEGAL,
185 Token::ILLEGAL,
186 Token::ILLEGAL,
187 Token::ILLEGAL,
188 Token::ILLEGAL,
189 Token::ILLEGAL,
190 Token::ILLEGAL,
191 Token::LBRACK, // 0x5b
192 Token::ILLEGAL,
193 Token::RBRACK, // 0x5d
194 Token::ILLEGAL,
195 Token::ILLEGAL,
196 Token::ILLEGAL,
197 Token::ILLEGAL,
198 Token::ILLEGAL,
199 Token::ILLEGAL,
200 Token::ILLEGAL,
201 Token::ILLEGAL,
202 Token::ILLEGAL,
203 Token::ILLEGAL,
204 Token::ILLEGAL,
205 Token::ILLEGAL,
206 Token::ILLEGAL,
207 Token::ILLEGAL,
208 Token::ILLEGAL,
209 Token::ILLEGAL,
210 Token::ILLEGAL,
211 Token::ILLEGAL,
212 Token::ILLEGAL,
213 Token::ILLEGAL,
214 Token::ILLEGAL,
215 Token::ILLEGAL,
216 Token::ILLEGAL,
217 Token::ILLEGAL,
218 Token::ILLEGAL,
219 Token::ILLEGAL,
220 Token::ILLEGAL,
221 Token::ILLEGAL,
222 Token::ILLEGAL,
223 Token::LBRACE, // 0x7b
224 Token::ILLEGAL,
225 Token::RBRACE, // 0x7d
226 Token::BIT_NOT, // 0x7e
227 Token::ILLEGAL
228 };
229
230
231 Token::Value JavaScriptScanner::Next() {
232 current_ = next_;
233 has_line_terminator_before_next_ = false;
234 has_multiline_comment_before_next_ = false;
235 if (static_cast<unsigned>(c0_) <= 0x7f) {
236 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);
237 if (token != Token::ILLEGAL) {
238 int pos = source_pos();
239 next_.token = token;
240 next_.location.beg_pos = pos;
241 next_.location.end_pos = pos + 1;
242 Advance();
243 return current_.token;
244 }
245 }
246 Scan();
247 return current_.token;
248 }
249
250
251 static inline bool IsByteOrderMark(uc32 c) {
252 // The Unicode value U+FFFE is guaranteed never to be assigned as a
253 // Unicode character; this implies that in a Unicode context the
254 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
255 // character expressed in little-endian byte order (since it could
256 // not be a U+FFFE character expressed in big-endian byte
257 // order). Nevertheless, we check for it to be compatible with
258 // Spidermonkey.
259 return c == 0xFEFF || c == 0xFFFE;
260 }
261
262
263 bool JavaScriptScanner::SkipWhiteSpace() {
264 int start_position = source_pos();
265
266 while (true) {
267 // We treat byte-order marks (BOMs) as whitespace for better
268 // compatibility with Spidermonkey and other JavaScript engines.
269 while (unicode_cache_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) {
270 // IsWhiteSpace() includes line terminators!
271 if (unicode_cache_->IsLineTerminator(c0_)) {
272 // Ignore line terminators, but remember them. This is necessary
273 // for automatic semicolon insertion.
274 has_line_terminator_before_next_ = true;
275 }
276 Advance();
277 }
278
279 // If there is an HTML comment end '-->' at the beginning of a
280 // line (with only whitespace in front of it), we treat the rest
281 // of the line as a comment. This is in line with the way
282 // SpiderMonkey handles it.
283 if (c0_ == '-' && has_line_terminator_before_next_) {
284 Advance();
285 if (c0_ == '-') {
286 Advance();
287 if (c0_ == '>') {
288 // Treat the rest of the line as a comment.
289 SkipSingleLineComment();
290 // Continue skipping white space after the comment.
291 continue;
292 }
293 PushBack('-'); // undo Advance()
294 }
295 PushBack('-'); // undo Advance()
296 }
297 // Return whether or not we skipped any characters.
298 return source_pos() != start_position;
299 }
300 }
301
302
303 Token::Value JavaScriptScanner::SkipSingleLineComment() {
304 Advance();
305
306 // The line terminator at the end of the line is not considered
307 // to be part of the single-line comment; it is recognized
308 // separately by the lexical grammar and becomes part of the
309 // stream of input elements for the syntactic grammar (see
310 // ECMA-262, section 7.4).
311 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {
312 Advance();
313 }
314
315 return Token::WHITESPACE;
316 }
317
318
319 Token::Value JavaScriptScanner::SkipMultiLineComment() {
320 ASSERT(c0_ == '*');
321 Advance();
322
323 while (c0_ >= 0) {
324 uc32 ch = c0_;
325 Advance();
326 if (unicode_cache_->IsLineTerminator(ch)) {
327 // Following ECMA-262, section 7.4, a comment containing
328 // a newline will make the comment count as a line-terminator.
329 has_multiline_comment_before_next_ = true;
330 }
331 // If we have reached the end of the multi-line comment, we
332 // consume the '/' and insert a whitespace. This way all
333 // multi-line comments are treated as whitespace.
334 if (ch == '*' && c0_ == '/') {
335 c0_ = ' ';
336 return Token::WHITESPACE;
337 }
338 }
339
340 // Unterminated multi-line comment.
341 return Token::ILLEGAL;
342 }
343
344
345 Token::Value JavaScriptScanner::ScanHtmlComment() {
346 // Check for <!-- comments.
347 ASSERT(c0_ == '!');
348 Advance();
349 if (c0_ == '-') {
350 Advance();
351 if (c0_ == '-') return SkipSingleLineComment();
352 PushBack('-'); // undo Advance()
353 }
354 PushBack('!'); // undo Advance()
355 ASSERT(c0_ == '!');
356 return Token::LT;
357 }
358
359
360 void JavaScriptScanner::Scan() {
361 next_.literal_chars = NULL;
362 Token::Value token;
363 do {
364 // Remember the position of the next token
365 next_.location.beg_pos = source_pos();
366
367 switch (c0_) {
368 case ' ':
369 case '\t':
370 Advance();
371 token = Token::WHITESPACE;
372 break;
373
374 case '\n':
375 Advance();
376 has_line_terminator_before_next_ = true;
377 token = Token::WHITESPACE;
378 break;
379
380 case '"': case '\'':
381 token = ScanString();
382 break;
383
384 case '<':
385 // < <= << <<= <!--
386 Advance();
387 if (c0_ == '=') {
388 token = Select(Token::LTE);
389 } else if (c0_ == '<') {
390 token = Select('=', Token::ASSIGN_SHL, Token::SHL);
391 } else if (c0_ == '!') {
392 token = ScanHtmlComment();
393 } else {
394 token = Token::LT;
395 }
396 break;
397
398 case '>':
399 // > >= >> >>= >>> >>>=
400 Advance();
401 if (c0_ == '=') {
402 token = Select(Token::GTE);
403 } else if (c0_ == '>') {
404 // >> >>= >>> >>>=
405 Advance();
406 if (c0_ == '=') {
407 token = Select(Token::ASSIGN_SAR);
408 } else if (c0_ == '>') {
409 token = Select('=', Token::ASSIGN_SHR, Token::SHR);
410 } else {
411 token = Token::SAR;
412 }
413 } else {
414 token = Token::GT;
415 }
416 break;
417
418 case '=':
419 // = == ===
420 Advance();
421 if (c0_ == '=') {
422 token = Select('=', Token::EQ_STRICT, Token::EQ);
423 } else {
424 token = Token::ASSIGN;
425 }
426 break;
427
428 case '!':
429 // ! != !==
430 Advance();
431 if (c0_ == '=') {
432 token = Select('=', Token::NE_STRICT, Token::NE);
433 } else {
434 token = Token::NOT;
435 }
436 break;
437
438 case '+':
439 // + ++ +=
440 Advance();
441 if (c0_ == '+') {
442 token = Select(Token::INC);
443 } else if (c0_ == '=') {
444 token = Select(Token::ASSIGN_ADD);
445 } else {
446 token = Token::ADD;
447 }
448 break;
449
450 case '-':
451 // - -- --> -=
452 Advance();
453 if (c0_ == '-') {
454 Advance();
455 if (c0_ == '>' && has_line_terminator_before_next_) {
456 // For compatibility with SpiderMonkey, we skip lines that
457 // start with an HTML comment end '-->'.
458 token = SkipSingleLineComment();
459 } else {
460 token = Token::DEC;
461 }
462 } else if (c0_ == '=') {
463 token = Select(Token::ASSIGN_SUB);
464 } else {
465 token = Token::SUB;
466 }
467 break;
468
469 case '*':
470 // * *=
471 token = Select('=', Token::ASSIGN_MUL, Token::MUL);
472 break;
473
474 case '%':
475 // % %=
476 token = Select('=', Token::ASSIGN_MOD, Token::MOD);
477 break;
478
479 case '/':
480 // / // /* /=
481 Advance();
482 if (c0_ == '/') {
483 token = SkipSingleLineComment();
484 } else if (c0_ == '*') {
485 token = SkipMultiLineComment();
486 } else if (c0_ == '=') {
487 token = Select(Token::ASSIGN_DIV);
488 } else {
489 token = Token::DIV;
490 }
491 break;
492
493 case '&':
494 // & && &=
495 Advance();
496 if (c0_ == '&') {
497 token = Select(Token::AND);
498 } else if (c0_ == '=') {
499 token = Select(Token::ASSIGN_BIT_AND);
500 } else {
501 token = Token::BIT_AND;
502 }
503 break;
504
505 case '|':
506 // | || |=
507 Advance();
508 if (c0_ == '|') {
509 token = Select(Token::OR);
510 } else if (c0_ == '=') {
511 token = Select(Token::ASSIGN_BIT_OR);
512 } else {
513 token = Token::BIT_OR;
514 }
515 break;
516
517 case '^':
518 // ^ ^=
519 token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
520 break;
521
522 case '.':
523 // . Number
524 Advance();
525 if (IsDecimalDigit(c0_)) {
526 token = ScanNumber(true);
527 } else {
528 token = Token::PERIOD;
529 }
530 break;
531
532 case ':':
533 token = Select(Token::COLON);
534 break;
535
536 case ';':
537 token = Select(Token::SEMICOLON);
538 break;
539
540 case ',':
541 token = Select(Token::COMMA);
542 break;
543
544 case '(':
545 token = Select(Token::LPAREN);
546 break;
547
548 case ')':
549 token = Select(Token::RPAREN);
550 break;
551
552 case '[':
553 token = Select(Token::LBRACK);
554 break;
555
556 case ']':
557 token = Select(Token::RBRACK);
558 break;
559
560 case '{':
561 token = Select(Token::LBRACE);
562 break;
563
564 case '}':
565 token = Select(Token::RBRACE);
566 break;
567
568 case '?':
569 token = Select(Token::CONDITIONAL);
570 break;
571
572 case '~':
573 token = Select(Token::BIT_NOT);
574 break;
575
576 default:
577 if (unicode_cache_->IsIdentifierStart(c0_)) {
578 token = ScanIdentifierOrKeyword();
579 } else if (IsDecimalDigit(c0_)) {
580 token = ScanNumber(false);
581 } else if (SkipWhiteSpace()) {
582 token = Token::WHITESPACE;
583 } else if (c0_ < 0) {
584 token = Token::EOS;
585 } else {
586 token = Select(Token::ILLEGAL);
587 }
588 break;
589 }
590
591 // Continue scanning for tokens as long as we're just skipping
592 // whitespace.
593 } while (token == Token::WHITESPACE);
594
595 next_.location.end_pos = source_pos();
596 next_.token = token;
597 }
598
599
600 void JavaScriptScanner::SeekForward(int pos) {
601 // After this call, we will have the token at the given position as
602 // the "next" token. The "current" token will be invalid.
603 if (pos == next_.location.beg_pos) return;
604 int current_pos = source_pos();
605 ASSERT_EQ(next_.location.end_pos, current_pos);
606 // Positions inside the lookahead token aren't supported.
607 ASSERT(pos >= current_pos);
608 if (pos != current_pos) {
609 source_->SeekForward(pos - source_->pos());
610 Advance();
611 // This function is only called to seek to the location
612 // of the end of a function (at the "}" token). It doesn't matter
613 // whether there was a line terminator in the part we skip.
614 has_line_terminator_before_next_ = false;
615 has_multiline_comment_before_next_ = false;
616 }
617 Scan();
618 }
619
620
621 void JavaScriptScanner::ScanEscape() {
622 uc32 c = c0_;
623 Advance();
624
625 // Skip escaped newlines.
626 if (unicode_cache_->IsLineTerminator(c)) {
627 // Allow CR+LF newlines in multiline string literals.
628 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
629 // Allow LF+CR newlines in multiline string literals.
630 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
54 return; 631 return;
55 } 632 }
56 if (pushback_limit_ == NULL && buffer_cursor_ > buffer_) { 633
57 // buffer_ is writable, buffer_cursor_ is const pointer. 634 switch (c) {
58 buffer_[--buffer_cursor_ - buffer_] = static_cast<uc16>(character); 635 case '\'': // fall through
59 pos_--; 636 case '"' : // fall through
60 return; 637 case '\\': break;
61 } 638 case 'b' : c = '\b'; break;
62 SlowPushBack(static_cast<uc16>(character)); 639 case 'f' : c = '\f'; break;
63 } 640 case 'n' : c = '\n'; break;
64 641 case 'r' : c = '\r'; break;
65 642 case 't' : c = '\t'; break;
66 void BufferedUC16CharacterStream::SlowPushBack(uc16 character) { 643 case 'u' : {
67 // In pushback mode, the end of the buffer contains pushback, 644 c = ScanHexNumber(4);
68 // and the start of the buffer (from buffer start to pushback_limit_) 645 if (c < 0) c = 'u';
69 // contains valid data that comes just after the pushback. 646 break;
70 // We NULL the pushback_limit_ if pushing all the way back to the 647 }
71 // start of the buffer. 648 case 'v' : c = '\v'; break;
72 649 case 'x' : {
73 if (pushback_limit_ == NULL) { 650 c = ScanHexNumber(2);
74 // Enter pushback mode. 651 if (c < 0) c = 'x';
75 pushback_limit_ = buffer_end_; 652 break;
76 buffer_end_ = buffer_ + kBufferSize; 653 }
77 buffer_cursor_ = buffer_end_; 654 case '0' : // fall through
78 } 655 case '1' : // fall through
79 // Ensure that there is room for at least one pushback. 656 case '2' : // fall through
80 ASSERT(buffer_cursor_ > buffer_); 657 case '3' : // fall through
81 ASSERT(pos_ > 0); 658 case '4' : // fall through
82 buffer_[--buffer_cursor_ - buffer_] = character; 659 case '5' : // fall through
83 if (buffer_cursor_ == buffer_) { 660 case '6' : // fall through
84 pushback_limit_ = NULL; 661 case '7' : c = ScanOctalEscape(c, 2); break;
85 } else if (buffer_cursor_ < pushback_limit_) { 662 }
86 pushback_limit_ = buffer_cursor_; 663
87 } 664 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these
88 pos_--; 665 // should be illegal, but they are commonly handled
89 } 666 // as non-escaped characters by JS VMs.
90 667 AddLiteralChar(c);
91 668 }
92 bool BufferedUC16CharacterStream::ReadBlock() { 669
93 buffer_cursor_ = buffer_; 670
94 if (pushback_limit_ != NULL) { 671 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of
95 // Leave pushback mode. 672 // ECMA-262. Other JS VMs support them.
96 buffer_end_ = pushback_limit_; 673 uc32 JavaScriptScanner::ScanOctalEscape(uc32 c, int length) {
97 pushback_limit_ = NULL; 674 uc32 x = c - '0';
98 // If there were any valid characters left at the 675 int i = 0;
99 // start of the buffer, use those. 676 for (; i < length; i++) {
100 if (buffer_cursor_ < buffer_end_) return true; 677 int d = c0_ - '0';
101 // Otherwise read a new block. 678 if (d < 0 || d > 7) break;
102 } 679 int nx = x * 8 + d;
103 unsigned length = FillBuffer(pos_, kBufferSize); 680 if (nx >= 256) break;
104 buffer_end_ = buffer_ + length; 681 x = nx;
105 return length > 0; 682 Advance();
106 } 683 }
107 684 // Anything except '\0' is an octal escape sequence, illegal in strict mode.
108 685 // Remember the position of octal escape sequences so that an error
109 unsigned BufferedUC16CharacterStream::SlowSeekForward(unsigned delta) { 686 // can be reported later (in strict mode).
110 // Leave pushback mode (i.e., ignore that there might be valid data 687 // We don't report the error immediately, because the octal escape can
111 // in the buffer before the pushback_limit_ point). 688 // occur before the "use strict" directive.
112 pushback_limit_ = NULL; 689 if (c != '0' || i > 0) {
113 return BufferSeekForward(delta); 690 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);
114 } 691 }
692 return x;
693 }
694
695
696 Token::Value JavaScriptScanner::ScanString() {
697 uc32 quote = c0_;
698 Advance(); // consume quote
699
700 LiteralScope literal(this);
701 while (c0_ != quote && c0_ >= 0
702 && !unicode_cache_->IsLineTerminator(c0_)) {
703 uc32 c = c0_;
704 Advance();
705 if (c == '\\') {
706 if (c0_ < 0) return Token::ILLEGAL;
707 ScanEscape();
708 } else {
709 AddLiteralChar(c);
710 }
711 }
712 if (c0_ != quote) return Token::ILLEGAL;
713 literal.Complete();
714
715 Advance(); // consume quote
716 return Token::STRING;
717 }
718
719
720 void JavaScriptScanner::ScanDecimalDigits() {
721 while (IsDecimalDigit(c0_))
722 AddLiteralCharAdvance();
723 }
724
725
726 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) {
727 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
728
729 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
730
731 LiteralScope literal(this);
732 if (seen_period) {
733 // we have already seen a decimal point of the float
734 AddLiteralChar('.');
735 ScanDecimalDigits(); // we know we have at least one digit
736
737 } else {
738 // if the first character is '0' we must check for octals and hex
739 if (c0_ == '0') {
740 int start_pos = source_pos(); // For reporting octal positions.
741 AddLiteralCharAdvance();
742
743 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number
744 if (c0_ == 'x' || c0_ == 'X') {
745 // hex number
746 kind = HEX;
747 AddLiteralCharAdvance();
748 if (!IsHexDigit(c0_)) {
749 // we must have at least one hex digit after 'x'/'X'
750 return Token::ILLEGAL;
751 }
752 while (IsHexDigit(c0_)) {
753 AddLiteralCharAdvance();
754 }
755 } else if ('0' <= c0_ && c0_ <= '7') {
756 // (possible) octal number
757 kind = OCTAL;
758 while (true) {
759 if (c0_ == '8' || c0_ == '9') {
760 kind = DECIMAL;
761 break;
762 }
763 if (c0_ < '0' || '7' < c0_) {
764 // Octal literal finished.
765 octal_pos_ = Location(start_pos, source_pos());
766 break;
767 }
768 AddLiteralCharAdvance();
769 }
770 }
771 }
772
773 // Parse decimal digits and allow trailing fractional part.
774 if (kind == DECIMAL) {
775 ScanDecimalDigits(); // optional
776 if (c0_ == '.') {
777 AddLiteralCharAdvance();
778 ScanDecimalDigits(); // optional
779 }
780 }
781 }
782
783 // scan exponent, if any
784 if (c0_ == 'e' || c0_ == 'E') {
785 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number
786 if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed
787 // scan exponent
788 AddLiteralCharAdvance();
789 if (c0_ == '+' || c0_ == '-')
790 AddLiteralCharAdvance();
791 if (!IsDecimalDigit(c0_)) {
792 // we must have at least one decimal digit after 'e'/'E'
793 return Token::ILLEGAL;
794 }
795 ScanDecimalDigits();
796 }
797
798 // The source character immediately following a numeric literal must
799 // not be an identifier start or a decimal digit; see ECMA-262
800 // section 7.8.3, page 17 (note that we read only one decimal digit
801 // if the value is 0).
802 if (IsDecimalDigit(c0_) || unicode_cache_->IsIdentifierStart(c0_))
803 return Token::ILLEGAL;
804
805 literal.Complete();
806
807 return Token::NUMBER;
808 }
809
810
811 uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() {
812 Advance();
813 if (c0_ != 'u') return -1;
814 Advance();
815 uc32 result = ScanHexNumber(4);
816 if (result < 0) PushBack('u');
817 return result;
818 }
819
115 820
116 // ---------------------------------------------------------------------------- 821 // ----------------------------------------------------------------------------
117 // GenericStringUC16CharacterStream 822 // Keyword Matcher
118 823
119 824 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
120 GenericStringUC16CharacterStream::GenericStringUC16CharacterStream( 825 KEYWORD_GROUP('b') \
121 Handle<String> data, 826 KEYWORD("break", Token::BREAK) \
122 unsigned start_position, 827 KEYWORD_GROUP('c') \
123 unsigned end_position) 828 KEYWORD("case", Token::CASE) \
124 : string_(data), 829 KEYWORD("catch", Token::CATCH) \
125 length_(end_position) { 830 KEYWORD("class", Token::FUTURE_RESERVED_WORD) \
126 ASSERT(end_position >= start_position); 831 KEYWORD("const", Token::CONST) \
127 buffer_cursor_ = buffer_; 832 KEYWORD("continue", Token::CONTINUE) \
128 buffer_end_ = buffer_; 833 KEYWORD_GROUP('d') \
129 pos_ = start_position; 834 KEYWORD("debugger", Token::DEBUGGER) \
130 } 835 KEYWORD("default", Token::DEFAULT) \
131 836 KEYWORD("delete", Token::DELETE) \
132 837 KEYWORD("do", Token::DO) \
133 GenericStringUC16CharacterStream::~GenericStringUC16CharacterStream() { } 838 KEYWORD_GROUP('e') \
134 839 KEYWORD("else", Token::ELSE) \
135 840 KEYWORD("enum", Token::FUTURE_RESERVED_WORD) \
136 unsigned GenericStringUC16CharacterStream::BufferSeekForward(unsigned delta) { 841 KEYWORD("export", Token::FUTURE_RESERVED_WORD) \
137 unsigned old_pos = pos_; 842 KEYWORD("extends", Token::FUTURE_RESERVED_WORD) \
138 pos_ = Min(pos_ + delta, length_); 843 KEYWORD_GROUP('f') \
139 ReadBlock(); 844 KEYWORD("false", Token::FALSE_LITERAL) \
140 return pos_ - old_pos; 845 KEYWORD("finally", Token::FINALLY) \
141 } 846 KEYWORD("for", Token::FOR) \
142 847 KEYWORD("function", Token::FUNCTION) \
143 848 KEYWORD_GROUP('i') \
144 unsigned GenericStringUC16CharacterStream::FillBuffer(unsigned from_pos, 849 KEYWORD("if", Token::IF) \
145 unsigned length) { 850 KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \
146 if (from_pos >= length_) return 0; 851 KEYWORD("import", Token::FUTURE_RESERVED_WORD) \
147 if (from_pos + length > length_) { 852 KEYWORD("in", Token::IN) \
148 length = length_ - from_pos; 853 KEYWORD("instanceof", Token::INSTANCEOF) \
149 } 854 KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \
150 String::WriteToFlat<uc16>(*string_, buffer_, from_pos, from_pos + length); 855 KEYWORD_GROUP('l') \
151 return length; 856 KEYWORD("let", harmony_block_scoping \
152 } 857 ? Token::LET : Token::FUTURE_STRICT_RESERVED_WORD) \
153 858 KEYWORD_GROUP('n') \
154 859 KEYWORD("new", Token::NEW) \
155 // ---------------------------------------------------------------------------- 860 KEYWORD("null", Token::NULL_LITERAL) \
156 // Utf8ToUC16CharacterStream 861 KEYWORD_GROUP('p') \
157 Utf8ToUC16CharacterStream::Utf8ToUC16CharacterStream(const byte* data, 862 KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \
158 unsigned length) 863 KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \
159 : BufferedUC16CharacterStream(), 864 KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \
160 raw_data_(data), 865 KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \
161 raw_data_length_(length), 866 KEYWORD_GROUP('r') \
162 raw_data_pos_(0), 867 KEYWORD("return", Token::RETURN) \
163 raw_character_position_(0) { 868 KEYWORD_GROUP('s') \
164 ReadBlock(); 869 KEYWORD("static", Token::FUTURE_STRICT_RESERVED_WORD) \
165 } 870 KEYWORD("super", Token::FUTURE_RESERVED_WORD) \
166 871 KEYWORD("switch", Token::SWITCH) \
167 872 KEYWORD_GROUP('t') \
168 Utf8ToUC16CharacterStream::~Utf8ToUC16CharacterStream() { } 873 KEYWORD("this", Token::THIS) \
169 874 KEYWORD("throw", Token::THROW) \
170 875 KEYWORD("true", Token::TRUE_LITERAL) \
171 unsigned Utf8ToUC16CharacterStream::BufferSeekForward(unsigned delta) { 876 KEYWORD("try", Token::TRY) \
172 unsigned old_pos = pos_; 877 KEYWORD("typeof", Token::TYPEOF) \
173 unsigned target_pos = pos_ + delta; 878 KEYWORD_GROUP('v') \
174 SetRawPosition(target_pos); 879 KEYWORD("var", Token::VAR) \
175 pos_ = raw_character_position_; 880 KEYWORD("void", Token::VOID) \
176 ReadBlock(); 881 KEYWORD_GROUP('w') \
177 return pos_ - old_pos; 882 KEYWORD("while", Token::WHILE) \
178 } 883 KEYWORD("with", Token::WITH) \
179 884 KEYWORD_GROUP('y') \
180 885 KEYWORD("yield", Token::FUTURE_STRICT_RESERVED_WORD)
181 unsigned Utf8ToUC16CharacterStream::FillBuffer(unsigned char_position, 886
182 unsigned length) { 887
183 static const unibrow::uchar kMaxUC16Character = 0xffff; 888 static Token::Value KeywordOrIdentifierToken(const char* input,
184 SetRawPosition(char_position); 889 int input_length,
185 if (raw_character_position_ != char_position) { 890 bool harmony_block_scoping) {
186 // char_position was not a valid position in the stream (hit the end 891 ASSERT(input_length >= 1);
187 // while spooling to it). 892 const int kMinLength = 2;
188 return 0u; 893 const int kMaxLength = 10;
189 } 894 if (input_length < kMinLength || input_length > kMaxLength) {
190 unsigned i = 0; 895 return Token::IDENTIFIER;
191 while (i < length) { 896 }
192 if (raw_data_pos_ == raw_data_length_) break; 897 switch (input[0]) {
193 unibrow::uchar c = raw_data_[raw_data_pos_]; 898 default:
194 if (c <= unibrow::Utf8::kMaxOneByteChar) { 899 #define KEYWORD_GROUP_CASE(ch) \
195 raw_data_pos_++; 900 break; \
901 case ch:
902 #define KEYWORD(keyword, token) \
903 { \
904 /* 'keyword' is a char array, so sizeof(keyword) is */ \
905 /* strlen(keyword) plus 1 for the NUL char. */ \
906 const int keyword_length = sizeof(keyword) - 1; \
907 STATIC_ASSERT(keyword_length >= kMinLength); \
908 STATIC_ASSERT(keyword_length <= kMaxLength); \
909 if (input_length == keyword_length && \
910 input[1] == keyword[1] && \
911 (keyword_length <= 2 || input[2] == keyword[2]) && \
912 (keyword_length <= 3 || input[3] == keyword[3]) && \
913 (keyword_length <= 4 || input[4] == keyword[4]) && \
914 (keyword_length <= 5 || input[5] == keyword[5]) && \
915 (keyword_length <= 6 || input[6] == keyword[6]) && \
916 (keyword_length <= 7 || input[7] == keyword[7]) && \
917 (keyword_length <= 8 || input[8] == keyword[8]) && \
918 (keyword_length <= 9 || input[9] == keyword[9])) { \
919 return token; \
920 } \
921 }
922 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
923 }
924 return Token::IDENTIFIER;
925 }
926
927
928 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() {
929 ASSERT(unicode_cache_->IsIdentifierStart(c0_));
930 LiteralScope literal(this);
931 // Scan identifier start character.
932 if (c0_ == '\\') {
933 uc32 c = ScanIdentifierUnicodeEscape();
934 // Only allow legal identifier start characters.
935 if (c < 0 ||
936 c == '\\' || // No recursive escapes.
937 !unicode_cache_->IsIdentifierStart(c)) {
938 return Token::ILLEGAL;
939 }
940 AddLiteralChar(c);
941 return ScanIdentifierSuffix(&literal);
942 }
943
944 uc32 first_char = c0_;
945 Advance();
946 AddLiteralChar(first_char);
947
948 // Scan the rest of the identifier characters.
949 while (unicode_cache_->IsIdentifierPart(c0_)) {
950 if (c0_ != '\\') {
951 uc32 next_char = c0_;
952 Advance();
953 AddLiteralChar(next_char);
954 continue;
955 }
956 // Fallthrough if no longer able to complete keyword.
957 return ScanIdentifierSuffix(&literal);
958 }
959
960 literal.Complete();
961
962 if (next_.literal_chars->is_ascii()) {
963 Vector<const char> chars = next_.literal_chars->ascii_literal();
964 return KeywordOrIdentifierToken(chars.start(),
965 chars.length(),
966 harmony_block_scoping_);
967 }
968
969 return Token::IDENTIFIER;
970 }
971
972
973 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) {
974 // Scan the rest of the identifier characters.
975 while (unicode_cache_->IsIdentifierPart(c0_)) {
976 if (c0_ == '\\') {
977 uc32 c = ScanIdentifierUnicodeEscape();
978 // Only allow legal identifier part characters.
979 if (c < 0 ||
980 c == '\\' ||
981 !unicode_cache_->IsIdentifierPart(c)) {
982 return Token::ILLEGAL;
983 }
984 AddLiteralChar(c);
196 } else { 985 } else {
197 c = unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_, 986 AddLiteralChar(c0_);
198 raw_data_length_ - raw_data_pos_, 987 Advance();
199 &raw_data_pos_); 988 }
200 // Don't allow characters outside of the BMP. 989 }
201 if (c > kMaxUC16Character) { 990 literal->Complete();
202 c = unibrow::Utf8::kBadChar; 991
992 return Token::IDENTIFIER;
993 }
994
995
996 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {
997 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
998 bool in_character_class = false;
999
1000 // Previous token is either '/' or '/=', in the second case, the
1001 // pattern starts at =.
1002 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
1003 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
1004
1005 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
1006 // the scanner should pass uninterpreted bodies to the RegExp
1007 // constructor.
1008 LiteralScope literal(this);
1009 if (seen_equal) {
1010 AddLiteralChar('=');
1011 }
1012
1013 while (c0_ != '/' || in_character_class) {
1014 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false;
1015 if (c0_ == '\\') { // Escape sequence.
1016 AddLiteralCharAdvance();
1017 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false;
1018 AddLiteralCharAdvance();
1019 // If the escape allows more characters, i.e., \x??, \u????, or \c?,
1020 // only "safe" characters are allowed (letters, digits, underscore),
1021 // otherwise the escape isn't valid and the invalid character has
1022 // its normal meaning. I.e., we can just continue scanning without
1023 // worrying whether the following characters are part of the escape
1024 // or not, since any '/', '\\' or '[' is guaranteed to not be part
1025 // of the escape sequence.
1026
1027 // TODO(896): At some point, parse RegExps more throughly to capture
1028 // octal esacpes in strict mode.
1029 } else { // Unescaped character.
1030 if (c0_ == '[') in_character_class = true;
1031 if (c0_ == ']') in_character_class = false;
1032 AddLiteralCharAdvance();
1033 }
1034 }
1035 Advance(); // consume '/'
1036
1037 literal.Complete();
1038
1039 return true;
1040 }
1041
1042
1043 bool JavaScriptScanner::ScanLiteralUnicodeEscape() {
1044 ASSERT(c0_ == '\\');
1045 uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0};
1046 Advance();
1047 int i = 1;
1048 if (c0_ == 'u') {
1049 i++;
1050 while (i < 6) {
1051 Advance();
1052 if (!IsHexDigit(c0_)) break;
1053 chars_read[i] = c0_;
1054 i++;
1055 }
1056 }
1057 if (i < 6) {
1058 // Incomplete escape. Undo all advances and return false.
1059 while (i > 0) {
1060 i--;
1061 PushBack(chars_read[i]);
1062 }
1063 return false;
1064 }
1065 // Complete escape. Add all chars to current literal buffer.
1066 for (int i = 0; i < 6; i++) {
1067 AddLiteralChar(chars_read[i]);
1068 }
1069 return true;
1070 }
1071
1072
1073 bool JavaScriptScanner::ScanRegExpFlags() {
1074 // Scan regular expression flags.
1075 LiteralScope literal(this);
1076 while (unicode_cache_->IsIdentifierPart(c0_)) {
1077 if (c0_ != '\\') {
1078 AddLiteralCharAdvance();
1079 } else {
1080 if (!ScanLiteralUnicodeEscape()) {
1081 break;
203 } 1082 }
204 } 1083 }
205 buffer_[i++] = static_cast<uc16>(c); 1084 }
206 } 1085 literal.Complete();
207 raw_character_position_ = char_position + i; 1086
208 return i; 1087 next_.location.end_pos = source_pos() - 1;
209 } 1088 return true;
210
211
212 static const byte kUtf8MultiByteMask = 0xC0;
213 static const byte kUtf8MultiByteCharStart = 0xC0;
214 static const byte kUtf8MultiByteCharFollower = 0x80;
215
216
217 #ifdef DEBUG
218 static bool IsUtf8MultiCharacterStart(byte first_byte) {
219 return (first_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharStart;
220 }
221 #endif
222
223
224 static bool IsUtf8MultiCharacterFollower(byte later_byte) {
225 return (later_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharFollower;
226 }
227
228
229 // Move the cursor back to point at the preceding UTF-8 character start
230 // in the buffer.
231 static inline void Utf8CharacterBack(const byte* buffer, unsigned* cursor) {
232 byte character = buffer[--*cursor];
233 if (character > unibrow::Utf8::kMaxOneByteChar) {
234 ASSERT(IsUtf8MultiCharacterFollower(character));
235 // Last byte of a multi-byte character encoding. Step backwards until
236 // pointing to the first byte of the encoding, recognized by having the
237 // top two bits set.
238 while (IsUtf8MultiCharacterFollower(buffer[--*cursor])) { }
239 ASSERT(IsUtf8MultiCharacterStart(buffer[*cursor]));
240 }
241 }
242
243
244 // Move the cursor forward to point at the next following UTF-8 character start
245 // in the buffer.
246 static inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) {
247 byte character = buffer[(*cursor)++];
248 if (character > unibrow::Utf8::kMaxOneByteChar) {
249 // First character of a multi-byte character encoding.
250 // The number of most-significant one-bits determines the length of the
251 // encoding:
252 // 110..... - (0xCx, 0xDx) one additional byte (minimum).
253 // 1110.... - (0xEx) two additional bytes.
254 // 11110... - (0xFx) three additional bytes (maximum).
255 ASSERT(IsUtf8MultiCharacterStart(character));
256 // Additional bytes is:
257 // 1 if value in range 0xC0 .. 0xDF.
258 // 2 if value in range 0xE0 .. 0xEF.
259 // 3 if value in range 0xF0 .. 0xF7.
260 // Encode that in a single value.
261 unsigned additional_bytes =
262 ((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03;
263 *cursor += additional_bytes;
264 ASSERT(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes]));
265 }
266 }
267
268
269 void Utf8ToUC16CharacterStream::SetRawPosition(unsigned target_position) {
270 if (raw_character_position_ > target_position) {
271 // Spool backwards in utf8 buffer.
272 do {
273 Utf8CharacterBack(raw_data_, &raw_data_pos_);
274 raw_character_position_--;
275 } while (raw_character_position_ > target_position);
276 return;
277 }
278 // Spool forwards in the utf8 buffer.
279 while (raw_character_position_ < target_position) {
280 if (raw_data_pos_ == raw_data_length_) return;
281 Utf8CharacterForward(raw_data_, &raw_data_pos_);
282 raw_character_position_++;
283 }
284 }
285
286
287 // ----------------------------------------------------------------------------
288 // ExternalTwoByteStringUC16CharacterStream
289
290 ExternalTwoByteStringUC16CharacterStream::
291 ~ExternalTwoByteStringUC16CharacterStream() { }
292
293
294 ExternalTwoByteStringUC16CharacterStream
295 ::ExternalTwoByteStringUC16CharacterStream(
296 Handle<ExternalTwoByteString> data,
297 int start_position,
298 int end_position)
299 : UC16CharacterStream(),
300 source_(data),
301 raw_data_(data->GetTwoByteData(start_position)) {
302 buffer_cursor_ = raw_data_,
303 buffer_end_ = raw_data_ + (end_position - start_position);
304 pos_ = start_position;
305 }
306
307
308 // ----------------------------------------------------------------------------
309 // Scanner::LiteralScope
310
311 Scanner::LiteralScope::LiteralScope(Scanner* self)
312 : scanner_(self), complete_(false) {
313 self->StartLiteral();
314 }
315
316
317 Scanner::LiteralScope::~LiteralScope() {
318 if (!complete_) scanner_->DropLiteral();
319 }
320
321
322 void Scanner::LiteralScope::Complete() {
323 scanner_->TerminateLiteral();
324 complete_ = true;
325 } 1089 }
326 1090
327 } } // namespace v8::internal 1091 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/scanner.h ('k') | src/scanner-base.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698