Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(629)

Side by Side Diff: src/lexer/lexer.re

Issue 32573003: Experimental parser: add UnicodeCache. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/lexer/lexer.h ('k') | src/lexer/lexer-shell.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Portions of this code based on re2c: 1 // Portions of this code based on re2c:
2 // (re2c/examples/push.re) 2 // (re2c/examples/push.re)
3 // Copyright 2013 the V8 project authors. All rights reserved. 3 // Copyright 2013 the V8 project authors. All rights reserved.
4 // Redistribution and use in source and binary forms, with or without 4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are 5 // modification, are permitted provided that the following conditions are
6 // met: 6 // met:
7 // 7 //
8 // * Redistributions of source code must retain the above copyright 8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer. 9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above 10 // * Redistributions in binary form must reproduce the above
(...skipping 13 matching lines...) Expand all
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 29
30 #include <stdio.h> 30 #include <stdio.h>
31 #include <stdlib.h> 31 #include <stdlib.h>
32 #include <string.h> 32 #include <string.h>
33 33
34 // FIXME: some of this is probably not needed.
35 #include "allocation.h"
36 #include "ast.h"
37 #include "preparse-data-format.h"
38 #include "preparse-data.h"
39 #include "scopes.h"
40 #include "preparser.h"
41 #include "api.h"
42 #include "ast.h"
43 #include "bootstrapper.h"
44 #include "char-predicates-inl.h"
45 #include "codegen.h"
46 #include "compiler.h"
47 #include "func-name-inferrer.h"
48 #include "messages.h"
49 #include "parser.h"
50 #include "platform.h"
51 #include "preparser.h"
52 #include "runtime.h"
53 #include "scanner-character-streams.h"
54 #include "scopeinfo.h"
55 #include "string-stream.h"
56
57
34 // TODO: 58 // TODO:
35 // - SpiderMonkey compatibility hack: " --> something" is treated 59 // - SpiderMonkey compatibility hack: " --> something" is treated
36 // as a single line comment. 60 // as a single line comment.
37 // - Run-time lexing modifications: harmony number literals, keywords depending 61 // - Run-time lexing modifications: harmony number literals, keywords depending
38 // on harmony_modules, harmony_scoping 62 // on harmony_modules, harmony_scoping
39 // - Escaping the string literals (like the baseline does) 63 // - Escaping the string literals (like the baseline does)
40 // - Error recovery after illegal tokens. 64 // - Error recovery after illegal tokens.
41 65
42 enum Condition { 66 enum Condition {
43 kConditionNormal, 67 kConditionNormal,
(...skipping 25 matching lines...) Expand all
69 #define O_BINARY 0 93 #define O_BINARY 0
70 #endif 94 #endif
71 95
72 #endif // defined(WIN32) 96 #endif // defined(WIN32)
73 97
74 #include "experimental-scanner.h" 98 #include "experimental-scanner.h"
75 #include "lexer.h" 99 #include "lexer.h"
76 100
77 using namespace v8::internal; 101 using namespace v8::internal;
78 102
103 namespace {
104
105 inline int HexValue(uc32 c) {
106 c -= '0';
107 if (static_cast<unsigned>(c) <= 9) return c;
108 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
109 if (static_cast<unsigned>(c) <= 5) return c + 10;
110 return -1;
111 }
112
113 }
114
79 #define PUSH_TOKEN(T) { send(T); SKIP(); } 115 #define PUSH_TOKEN(T) { send(T); SKIP(); }
80 #define PUSH_TOKEN_LOOKAHEAD(T) { --cursor_; send(T); SKIP(); } 116 #define PUSH_TOKEN_LOOKAHEAD(T) { --cursor_; send(T); SKIP(); }
81 #define PUSH_EOF_AND_RETURN() { send(Token::EOS); eof_ = true; return 1;} 117 #define PUSH_EOF_AND_RETURN() { send(Token::EOS); eof_ = true; return 1;}
82 #define PUSH_LINE_TERMINATOR() { SKIP(); } 118 #define PUSH_LINE_TERMINATOR() { SKIP(); }
83 #define TERMINATE_ILLEGAL() { send(Token::ILLEGAL); send(Token::EOS); return 1; } 119 #define TERMINATE_ILLEGAL() { send(Token::ILLEGAL); send(Token::EOS); return 1; }
84 120
85 PushScanner::PushScanner(ExperimentalScanner* sink) 121 #define YYCTYPE uint8_t
86 : eof_(false), 122
123 PushScanner::PushScanner(ExperimentalScanner* sink, UnicodeCache* unicode_cache)
124 : unicode_cache_(unicode_cache),
125 eof_(false),
87 state_(-1), 126 state_(-1),
88 condition_(kConditionNormal), 127 condition_(kConditionNormal),
89 limit_(NULL), 128 limit_(NULL),
90 start_(NULL), 129 start_(NULL),
91 cursor_(NULL), 130 cursor_(NULL),
92 marker_(NULL), 131 marker_(NULL),
93 real_start_(0), 132 real_start_(0),
94 buffer_(NULL), 133 buffer_(NULL),
95 buffer_end_(NULL), 134 buffer_end_(NULL),
96 yych(0), 135 yych(0),
97 yyaccept(0), 136 yyaccept(0),
98 sink_(sink) { 137 sink_(sink) {
99 138
100 } 139 }
101 140
102 PushScanner::~PushScanner() { 141 PushScanner::~PushScanner() {
103 } 142 }
104 143
144
145 uc32 PushScanner::ScanHexNumber(int length) {
146 // We have seen \uXXXX, let's see what it is.
147 // FIXME: we never end up in here if only a subset of the 4 chars are valid
148 // hex digits -> handle the case where they're not.
149 uc32 x = 0;
150 for (YYCTYPE* s = cursor_ - length; s != cursor_; ++s) {
151 int d = HexValue(*s);
152 if (d < 0) {
153 return -1;
154 }
155 x = x * 16 + d;
156 }
157 return x;
158 }
159
160
161 bool PushScanner::ValidIdentifierPart() {
162 return unicode_cache_->IsIdentifierPart(ScanHexNumber(4));
163 }
164
165 bool PushScanner::ValidIdentifierStart() {
166 return unicode_cache_->IsIdentifierStart(ScanHexNumber(4));
167 }
168
105 void PushScanner::send(Token::Value token) { 169 void PushScanner::send(Token::Value token) {
106 int beg = (start_ - buffer_) + real_start_; 170 int beg = (start_ - buffer_) + real_start_;
107 int end = (cursor_ - buffer_) + real_start_; 171 int end = (cursor_ - buffer_) + real_start_;
108 if (FLAG_trace_lexer) { 172 if (FLAG_trace_lexer) {
109 printf("got %s at (%d, %d): ", Token::Name(token), beg, end); 173 printf("got %s at (%d, %d): ", Token::Name(token), beg, end);
110 for (uint8_t* s = start_; s != cursor_; s++) printf("%c", (char)*s); 174 for (uint8_t* s = start_; s != cursor_; s++) printf("%c", (char)*s);
111 printf(".\n"); 175 printf(".\n");
112 } 176 }
113 sink_->Record(token, beg, end); 177 sink_->Record(token, beg, end);
114 } 178 }
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
181 if (FLAG_trace_lexer) { 245 if (FLAG_trace_lexer) {
182 printf("Starting a round; state_: %d, condition_: %d\n", state_, condition_) ; 246 printf("Starting a round; state_: %d, condition_: %d\n", state_, condition_) ;
183 } 247 }
184 248
185 /*!re2c 249 /*!re2c
186 re2c:indent:top = 1; 250 re2c:indent:top = 1;
187 re2c:yych:conversion = 0; 251 re2c:yych:conversion = 0;
188 re2c:condenumprefix = kCondition; 252 re2c:condenumprefix = kCondition;
189 re2c:define:YYCONDTYPE = Condition; 253 re2c:define:YYCONDTYPE = Condition;
190 re2c:define:YYCURSOR = cursor_; 254 re2c:define:YYCURSOR = cursor_;
191 re2c:define:YYCTYPE = uint8_t;
192 re2c:define:YYLIMIT = limit_; 255 re2c:define:YYLIMIT = limit_;
193 re2c:define:YYMARKER = marker_; 256 re2c:define:YYMARKER = marker_;
194 257
195 eof = "\000"; 258 eof = "\000";
196 any = [\000-\377]; 259 any = [\000-\377];
197 whitespace_char = [ \t\v\f\r]; 260 whitespace_char = [ \t\v\f\r];
198 whitespace = whitespace_char+; 261 whitespace = whitespace_char+;
199 identifier_start_ = [$_a-zA-Z]; 262 identifier_start_ = [$_a-zA-Z];
200 identifier_char = [$_a-zA-Z0-9]; 263 identifier_char = [$_a-zA-Z0-9];
201 not_identifier_char = any\identifier_char\[\\]; 264 not_identifier_char = any\identifier_char\[\\];
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after
312 <Normal> "~" { PUSH_TOKEN(Token::BIT_NOT); } 375 <Normal> "~" { PUSH_TOKEN(Token::BIT_NOT); }
313 <Normal> "," { PUSH_TOKEN(Token::COMMA); } 376 <Normal> "," { PUSH_TOKEN(Token::COMMA); }
314 377
315 <Normal> line_terminator { PUSH_LINE_TERMINATOR(); } 378 <Normal> line_terminator { PUSH_LINE_TERMINATOR(); }
316 <Normal> whitespace { SKIP(); } 379 <Normal> whitespace { SKIP(); }
317 380
318 <Normal> ["] :=> DoubleQuoteString 381 <Normal> ["] :=> DoubleQuoteString
319 <Normal> ['] :=> SingleQuoteString 382 <Normal> ['] :=> SingleQuoteString
320 383
321 <Normal> identifier_start_ :=> Identifier 384 <Normal> identifier_start_ :=> Identifier
322 <Normal> "\\u0000" :=> IdentifierIllegal 385 <Normal> "\\u" [0-9a-fA-F]{4} { if (ValidIdentifierStart()) { YYSETCONDITION (kConditionIdentifier); goto yy0; } YYSETCONDITION(kConditionIdentifierIllegal); send(Token::ILLEGAL); start_ = cursor_; goto yy0; }
323 <Normal> "\\u" [0-9a-fA-F]{4} :=> Identifier
324 <Normal> "\\" { PUSH_TOKEN(Token::ILLEGAL); } 386 <Normal> "\\" { PUSH_TOKEN(Token::ILLEGAL); }
325 387
326 <Normal> eof { PUSH_EOF_AND_RETURN();} 388 <Normal> eof { PUSH_EOF_AND_RETURN();}
327 <Normal> any { PUSH_TOKEN(Token::ILLEGAL); } 389 <Normal> any { PUSH_TOKEN(Token::ILLEGAL); }
328 390
329 <DoubleQuoteString> "\\\\" { goto yy0; } 391 <DoubleQuoteString> "\\\\" { goto yy0; }
330 <DoubleQuoteString> "\\\"" { goto yy0; } 392 <DoubleQuoteString> "\\\"" { goto yy0; }
331 <DoubleQuoteString> '"' { PUSH_TOKEN(Token::STRING);} 393 <DoubleQuoteString> '"' { PUSH_TOKEN(Token::STRING);}
332 <DoubleQuoteString> "\\" "\n" "\r"? { goto yy0; } 394 <DoubleQuoteString> "\\" "\n" "\r"? { goto yy0; }
333 <DoubleQuoteString> "\\" "\r" "\n"? { goto yy0; } 395 <DoubleQuoteString> "\\" "\r" "\n"? { goto yy0; }
334 <DoubleQuoteString> "\n" => Normal { PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL) ; } 396 <DoubleQuoteString> "\n" => Normal { PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL) ; }
335 <DoubleQuoteString> "\r" => Normal { PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL) ; } 397 <DoubleQuoteString> "\r" => Normal { PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL) ; }
336 <DoubleQuoteString> eof { TERMINATE_ILLEGAL(); } 398 <DoubleQuoteString> eof { TERMINATE_ILLEGAL(); }
337 <DoubleQuoteString> any { goto yy0; } 399 <DoubleQuoteString> any { goto yy0; }
338 400
339 <SingleQuoteString> "\\\\" { goto yy0; } 401 <SingleQuoteString> "\\\\" { goto yy0; }
340 <SingleQuoteString> "\\'" { goto yy0; } 402 <SingleQuoteString> "\\'" { goto yy0; }
341 <SingleQuoteString> "'" { PUSH_TOKEN(Token::STRING);} 403 <SingleQuoteString> "'" { PUSH_TOKEN(Token::STRING);}
342 <SingleQuoteString> "\\" "\n" "\r"? { goto yy0; } 404 <SingleQuoteString> "\\" "\n" "\r"? { goto yy0; }
343 <SingleQuoteString> "\\" "\r" "\n"? { goto yy0; } 405 <SingleQuoteString> "\\" "\r" "\n"? { goto yy0; }
344 <SingleQuoteString> "\n" => Normal { PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL) ; } 406 <SingleQuoteString> "\n" => Normal { PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL) ; }
345 <SingleQuoteString> "\r" => Normal { PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL) ; } 407 <SingleQuoteString> "\r" => Normal { PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL) ; }
346 <SingleQuoteString> eof { TERMINATE_ILLEGAL(); } 408 <SingleQuoteString> eof { TERMINATE_ILLEGAL(); }
347 <SingleQuoteString> any { goto yy0; } 409 <SingleQuoteString> any { goto yy0; }
348 410
349 <Identifier> identifier_char+ { goto yy0; } 411 <Identifier> identifier_char+ { goto yy0; }
350 <Identifier> "\\u0000" :=> IdentifierIllegal 412 <Identifier> "\\u" [0-9a-fA-F]{4} { if (ValidIdentifierPart()) goto yy0; YYS ETCONDITION(kConditionIdentifierIllegal); send(Token::ILLEGAL); }
351 <Identifier> "\\u" [0-9a-fA-F]{4} { goto yy0; }
352 <Identifier> "\\" { PUSH_TOKEN(Token::ILLEGAL); } 413 <Identifier> "\\" { PUSH_TOKEN(Token::ILLEGAL); }
353 <Identifier> any { PUSH_TOKEN_LOOKAHEAD(Token::IDENTIFIER); } 414 <Identifier> any { PUSH_TOKEN_LOOKAHEAD(Token::IDENTIFIER); }
354 415
355 <IdentifierIllegal> identifier_char+ { goto yy0; } 416 <IdentifierIllegal> identifier_char+ { goto yy0; }
356 <IdentifierIllegal> "\\"+ { goto yy0; } 417 <IdentifierIllegal> "\\"+ { goto yy0; }
357 <IdentifierIllegal> any { PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL) ; } 418 <IdentifierIllegal> any { PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL) ; }
358 419
359 <SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();} 420 <SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();}
360 <SingleLineComment> eof { PUSH_TOKEN(Token::EOS); } 421 <SingleLineComment> eof { PUSH_TOKEN(Token::EOS); }
361 <SingleLineComment> any { goto yy0; } 422 <SingleLineComment> any { goto yy0; }
362 423
363 <MultiLineComment> [*][//] { PUSH_LINE_TERMINATOR();} 424 <MultiLineComment> [*][//] { PUSH_LINE_TERMINATOR();}
364 <MultiLineComment> eof { TERMINATE_ILLEGAL(); } 425 <MultiLineComment> eof { TERMINATE_ILLEGAL(); }
365 <MultiLineComment> any { goto yy0; } 426 <MultiLineComment> any { goto yy0; }
366 427
(...skipping 28 matching lines...) Expand all
395 size_t start_offset = start_ - buffer_; 456 size_t start_offset = start_ - buffer_;
396 memmove(buffer_, start_, limit_ - start_); 457 memmove(buffer_, start_, limit_ - start_);
397 marker_ -= start_offset; 458 marker_ -= start_offset;
398 cursor_ -= start_offset; 459 cursor_ -= start_offset;
399 limit_ -= start_offset; 460 limit_ -= start_offset;
400 start_ -= start_offset; 461 start_ -= start_offset;
401 real_start_ += start_offset; 462 real_start_ += start_offset;
402 } 463 }
403 return 0; 464 return 0;
404 } 465 }
OLDNEW
« no previous file with comments | « src/lexer/lexer.h ('k') | src/lexer/lexer-shell.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698