Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(411)

Side by Side Diff: src/lexer/lexer.re

Issue 28643002: Experimental parser: Small fixes. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: . Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | src/lexer/lexer-shell.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Portions of this code based on re2c: 1 // Portions of this code based on re2c:
2 // (re2c/examples/push.re) 2 // (re2c/examples/push.re)
3 // Copyright 2013 the V8 project authors. All rights reserved. 3 // Copyright 2013 the V8 project authors. All rights reserved.
4 // Redistribution and use in source and binary forms, with or without 4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are 5 // modification, are permitted provided that the following conditions are
6 // met: 6 // met:
7 // 7 //
8 // * Redistributions of source code must retain the above copyright 8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer. 9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above 10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following 11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided 12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution. 13 // with the distribution.
14 // * Neither the name of Google Inc. nor the names of its 14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived 15 // contributors may be used to endorse or promote products derived
16 // from this software without specific prior written permission. 16 // from this software without specific prior written permission.
17 // 17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 29
30 #include <fcntl.h>
31 #include <stdio.h> 30 #include <stdio.h>
32 #include <stddef.h>
33 #include <stdlib.h> 31 #include <stdlib.h>
34 #include <string.h> 32 #include <string.h>
35 33
36 // TODO: 34 // TODO:
37 // - SpiderMonkey compatibility hack: " --> something" is treated 35 // - SpiderMonkey compatibility hack: " --> something" is treated
38 // as a single line comment. 36 // as a single line comment.
39 // - An identifier cannot start immediately after a number.
40 // - Run-time lexing modifications: harmony number literals, keywords depending 37 // - Run-time lexing modifications: harmony number literals, keywords depending
41 // on harmony_modules, harmony_scoping 38 // on harmony_modules, harmony_scoping
39 // - Escaping the string literals (like the baseline does)
40 // - Error recovery after illegal tokens.
42 41
43 enum Condition { 42 enum Condition {
44 kConditionNormal, 43 kConditionNormal,
45 kConditionDoubleQuoteString, 44 kConditionDoubleQuoteString,
46 kConditionSingleQuoteString, 45 kConditionSingleQuoteString,
47 kConditionIdentifier, 46 kConditionIdentifier,
48 kConditionSingleLineComment, 47 kConditionSingleLineComment,
49 kConditionMultiLineComment, 48 kConditionMultiLineComment,
50 kConditionHtmlComment 49 kConditionHtmlComment
51 }; 50 };
(...skipping 20 matching lines...) Expand all
72 #endif // defined(WIN32) 71 #endif // defined(WIN32)
73 72
74 #include "lexer.h" 73 #include "lexer.h"
75 74
76 using namespace v8::internal; 75 using namespace v8::internal;
77 76
78 #define PUSH_TOKEN(T) { send(T); SKIP(); } 77 #define PUSH_TOKEN(T) { send(T); SKIP(); }
79 #define PUSH_TOKEN_LOOKAHEAD(T) { --cursor_; send(T); SKIP(); } 78 #define PUSH_TOKEN_LOOKAHEAD(T) { --cursor_; send(T); SKIP(); }
80 #define PUSH_EOF_AND_RETURN() { send(Token::EOS); eof_ = true; return 1;} 79 #define PUSH_EOF_AND_RETURN() { send(Token::EOS); eof_ = true; return 1;}
81 #define PUSH_LINE_TERMINATOR() { SKIP(); } 80 #define PUSH_LINE_TERMINATOR() { SKIP(); }
82 #define TERMINATE_ILLEGAL() { return 1; } 81 #define TERMINATE_ILLEGAL() { send(Token::ILLEGAL); send(Token::EOS); return 1; }
83 82
84 PushScanner::PushScanner(ExperimentalScanner* sink) 83 PushScanner::PushScanner(ExperimentalScanner* sink)
85 : eof_(false), 84 : eof_(false),
86 state_(-1), 85 state_(-1),
87 condition_(kConditionNormal), 86 condition_(kConditionNormal),
88 limit_(NULL), 87 limit_(NULL),
89 start_(NULL), 88 start_(NULL),
90 cursor_(NULL), 89 cursor_(NULL),
91 marker_(NULL), 90 marker_(NULL),
92 real_start_(0), 91 real_start_(0),
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
160 buffer_end_ = needed + buffer_; 159 buffer_end_ = needed + buffer_;
161 160
162 marker_ = marker__offset + buffer_; 161 marker_ = marker__offset + buffer_;
163 cursor_ = cursor__offset + buffer_; 162 cursor_ = cursor__offset + buffer_;
164 start_ = buffer_ + start_offset; 163 start_ = buffer_ + start_offset;
165 limit_ = limit__offset + buffer_; 164 limit_ = limit__offset + buffer_;
166 } 165 }
167 memcpy(limit_, input, input_size); 166 memcpy(limit_, input, input_size);
168 limit_ += input_size; 167 limit_ += input_size;
169 168
170 // The scanner starts here 169 #define SKIP() { start_ = cursor_; YYSETCONDITION(kConditionNorma l); goto yy0; }
171 #define YYLIMIT limit_ 170 #define YYFILL(n) { goto fill; }
172 #define YYCURSOR cursor_
173 #define YYMARKER marker_
174 #define YYCTYPE uint8_t
175 171
176 #define SKIP() { start_ = cursor_; YYSETCONDITION(kConditionNormal); goto yy 0; } 172 #define YYGETSTATE() state_
177 #define YYFILL(n) { goto fill; } 173 #define YYSETSTATE(x) { state_ = (x); }
178 174
179 #define YYGETSTATE() state_ 175 #define YYGETCONDITION() condition_
180 #define YYSETSTATE(x) { state_ = (x); } 176 #define YYSETCONDITION(x) { condition_ = (x); }
181
182 #define YYGETCONDITION() condition_
183 #define YYSETCONDITION(x) { condition_ = (x); }
184 177
185 start_: 178 start_:
186 if (FLAG_trace_lexer) { 179 if (FLAG_trace_lexer) {
187 printf("Starting a round; state_: %d, condition_: %d\n", state_, condition_) ; 180 printf("Starting a round; state_: %d, condition_: %d\n", state_, condition_) ;
188 } 181 }
189 182
190 /*!re2c 183 /*!re2c
191 re2c:indent:top = 1; 184 re2c:indent:top = 1;
192 re2c:yych:conversion = 0; 185 re2c:yych:conversion = 0;
193 re2c:condenumprefix = kCondition; 186 re2c:condenumprefix = kCondition;
194 re2c:define:YYCONDTYPE = Condition; 187 re2c:define:YYCONDTYPE = Condition;
188 re2c:define:YYCURSOR = cursor_;
189 re2c:define:YYCTYPE = uint8_t;
190 re2c:define:YYLIMIT = limit_;
191 re2c:define:YYMARKER = marker_;
195 192
196 eof = "\000"; 193 eof = "\000";
197 any = [\000-\377]; 194 any = [\000-\377];
198 whitespace_char = [ \t\v\f\r]; 195 whitespace_char = [ \t\v\f\r];
199 whitespace = whitespace_char+; 196 whitespace = whitespace_char+;
200 identifier_start_ = [$_\\a-zA-Z]; 197 identifier_start_ = [$_\\a-zA-Z];
201 identifier_char = [$_\\a-zA-Z0-9]; 198 identifier_char = [$_\\a-zA-Z0-9];
202 not_identifier_char = any\identifier_char; 199 not_identifier_char = any\identifier_char;
203 line_terminator = [\n\r]+; 200 line_terminator = [\n\r]+;
204 digit = [0-9]; 201 digit = [0-9];
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
274 <Normal> ">>>=" { PUSH_TOKEN(Token::ASSIGN_SHR); } 271 <Normal> ">>>=" { PUSH_TOKEN(Token::ASSIGN_SHR); }
275 <Normal> "<<=" { PUSH_TOKEN(Token::ASSIGN_SHL); } 272 <Normal> "<<=" { PUSH_TOKEN(Token::ASSIGN_SHL); }
276 <Normal> ">>=" { PUSH_TOKEN(Token::ASSIGN_SAR); } 273 <Normal> ">>=" { PUSH_TOKEN(Token::ASSIGN_SAR); }
277 <Normal> "<=" { PUSH_TOKEN(Token::LTE); } 274 <Normal> "<=" { PUSH_TOKEN(Token::LTE); }
278 <Normal> ">=" { PUSH_TOKEN(Token::GTE); } 275 <Normal> ">=" { PUSH_TOKEN(Token::GTE); }
279 <Normal> "<<" { PUSH_TOKEN(Token::SHL); } 276 <Normal> "<<" { PUSH_TOKEN(Token::SHL); }
280 <Normal> ">>" { PUSH_TOKEN(Token::SAR); } 277 <Normal> ">>" { PUSH_TOKEN(Token::SAR); }
281 <Normal> "<" { PUSH_TOKEN(Token::LT); } 278 <Normal> "<" { PUSH_TOKEN(Token::LT); }
282 <Normal> ">" { PUSH_TOKEN(Token::GT); } 279 <Normal> ">" { PUSH_TOKEN(Token::GT); }
283 280
284 <Normal> '0x' hex_digit+ { PUSH_TOKEN(Token::NUMBER); } 281 <Normal> '0x' hex_digit+ not_identifier_char { PUSH_TOKE N_LOOKAHEAD(Token::NUMBER); }
285 <Normal> "." digit+ maybe_exponent { PUSH_TOKEN(Token::NUMBER); } 282 <Normal> "." digit+ maybe_exponent not_identifier_char { PUSH_TOKE N_LOOKAHEAD(Token::NUMBER); }
286 <Normal> digit+ ("." digit+)? maybe_exponent { PUSH_TOKEN(Token::NUMBER); } 283 <Normal> digit+ ("." digit+)? maybe_exponent not_identifier_char { PUSH_TOKE N_LOOKAHEAD(Token::NUMBER); }
287 284
288 <Normal> "(" { PUSH_TOKEN(Token::LPAREN); } 285 <Normal> "(" { PUSH_TOKEN(Token::LPAREN); }
289 <Normal> ")" { PUSH_TOKEN(Token::RPAREN); } 286 <Normal> ")" { PUSH_TOKEN(Token::RPAREN); }
290 <Normal> "[" { PUSH_TOKEN(Token::LBRACK); } 287 <Normal> "[" { PUSH_TOKEN(Token::LBRACK); }
291 <Normal> "]" { PUSH_TOKEN(Token::RBRACK); } 288 <Normal> "]" { PUSH_TOKEN(Token::RBRACK); }
292 <Normal> "{" { PUSH_TOKEN(Token::LBRACE); } 289 <Normal> "{" { PUSH_TOKEN(Token::LBRACE); }
293 <Normal> "}" { PUSH_TOKEN(Token::RBRACE); } 290 <Normal> "}" { PUSH_TOKEN(Token::RBRACE); }
294 <Normal> ":" { PUSH_TOKEN(Token::COLON); } 291 <Normal> ":" { PUSH_TOKEN(Token::COLON); }
295 <Normal> ";" { PUSH_TOKEN(Token::SEMICOLON); } 292 <Normal> ";" { PUSH_TOKEN(Token::SEMICOLON); }
296 <Normal> "." { PUSH_TOKEN(Token::PERIOD); } 293 <Normal> "." { PUSH_TOKEN(Token::PERIOD); }
297 <Normal> "?" { PUSH_TOKEN(Token::CONDITIONAL); } 294 <Normal> "?" { PUSH_TOKEN(Token::CONDITIONAL); }
298 <Normal> "++" { PUSH_TOKEN(Token::INC); } 295 <Normal> "++" { PUSH_TOKEN(Token::INC); }
299 <Normal> "--" { PUSH_TOKEN(Token::DEC); } 296 <Normal> "--" { PUSH_TOKEN(Token::DEC); }
300 297
301 <Normal> "||" { PUSH_TOKEN(Token::OR); } 298 <Normal> "||" { PUSH_TOKEN(Token::OR); }
302 <Normal> "&&" { PUSH_TOKEN(Token::AND); } 299 <Normal> "&&" { PUSH_TOKEN(Token::AND); }
303 300
304 <Normal> "|" { PUSH_TOKEN(Token::BIT_OR); } 301 <Normal> "|" { PUSH_TOKEN(Token::BIT_OR); }
305 <Normal> "^" { PUSH_TOKEN(Token::BIT_XOR); } 302 <Normal> "^" { PUSH_TOKEN(Token::BIT_XOR); }
306 <Normal> "&" { PUSH_TOKEN(Token::BIT_AND); } 303 <Normal> "&" { PUSH_TOKEN(Token::BIT_AND); }
307 <Normal> "+" { PUSH_TOKEN(Token::ADD); } 304 <Normal> "+" { PUSH_TOKEN(Token::ADD); }
308 <Normal> "-" { PUSH_TOKEN(Token::SUB); } 305 <Normal> "-" { PUSH_TOKEN(Token::SUB); }
309 <Normal> "*" { PUSH_TOKEN(Token::MUL); } 306 <Normal> "*" { PUSH_TOKEN(Token::MUL); }
310 <Normal> "/" { PUSH_TOKEN(Token::DIV); } 307 <Normal> "/" { PUSH_TOKEN(Token::DIV); }
311 <Normal> "%" { PUSH_TOKEN(Token::MOD); } 308 <Normal> "%" { PUSH_TOKEN(Token::MOD); }
312 <Normal> "~" { PUSH_TOKEN(Token::BIT_NOT); } 309 <Normal> "~" { PUSH_TOKEN(Token::BIT_NOT); }
313 <Normal> "," { PUSH_TOKEN(Token::COMMA); } 310 <Normal> "," { PUSH_TOKEN(Token::COMMA); }
314 311
315 <Normal> line_terminator+ { PUSH_LINE_TERMINATOR(); } 312 <Normal> line_terminator { PUSH_LINE_TERMINATOR(); }
316 <Normal> whitespace { SKIP(); } 313 <Normal> whitespace { SKIP(); }
317 314
318 <Normal> ["] :=> DoubleQuoteString 315 <Normal> ["] :=> DoubleQuoteString
319 <Normal> ['] :=> SingleQuoteString 316 <Normal> ['] :=> SingleQuoteString
320 317
321 <Normal> identifier_start_ :=> Identifier 318 <Normal> identifier_start_ :=> Identifier
322 319
323 <Normal> eof { PUSH_EOF_AND_RETURN();} 320 <Normal> eof { PUSH_EOF_AND_RETURN();}
324 <Normal> any { TERMINATE_ILLEGAL(); } 321 <Normal> any { TERMINATE_ILLEGAL(); }
325 322
326 <DoubleQuoteString> "\\\"" { goto yy0; } 323 <DoubleQuoteString> "\\\"" { goto yy0; }
327 <DoubleQuoteString> '"' { PUSH_TOKEN(Token::STRING);} 324 <DoubleQuoteString> '"' { PUSH_TOKEN(Token::STRING);}
325 <DoubleQuoteString> line_terminator { TERMINATE_ILLEGAL(); }
326 <DoubleQuoteString> eof { TERMINATE_ILLEGAL(); }
328 <DoubleQuoteString> any { goto yy0; } 327 <DoubleQuoteString> any { goto yy0; }
329 328
330 <SingleQuoteString> "\\'" { goto yy0; } 329 <SingleQuoteString> "\\'" { goto yy0; }
331 <SingleQuoteString> "'" { PUSH_TOKEN(Token::STRING);} 330 <SingleQuoteString> "'" { PUSH_TOKEN(Token::STRING);}
331 <SingleQuoteString> line_terminator { TERMINATE_ILLEGAL(); }
332 <SingleQuoteString> eof { TERMINATE_ILLEGAL(); }
332 <SingleQuoteString> any { goto yy0; } 333 <SingleQuoteString> any { goto yy0; }
333 334
334 <Identifier> identifier_char+ { goto yy0; } 335 <Identifier> identifier_char+ { goto yy0; }
335 <Identifier> any { PUSH_TOKEN_LOOKAHEAD(Token::IDENTIFIER); } 336 <Identifier> any { PUSH_TOKEN_LOOKAHEAD(Token::IDENTIFIER); }
336 337
337 <SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();} 338 <SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();}
338 <SingleLineComment> eof { PUSH_LINE_TERMINATOR();} 339 <SingleLineComment> eof { PUSH_LINE_TERMINATOR();}
339 <SingleLineComment> any { goto yy0; } 340 <SingleLineComment> any { goto yy0; }
340 341
341 <MultiLineComment> [*][//] { PUSH_LINE_TERMINATOR();} 342 <MultiLineComment> [*][//] { PUSH_LINE_TERMINATOR();}
342 <MultiLineComment> eof { TERMINATE_ILLEGAL(); } 343 <MultiLineComment> eof { TERMINATE_ILLEGAL(); }
343 <MultiLineComment> any { goto yy0; } 344 <MultiLineComment> any { goto yy0; }
344 345
345 <HtmlComment> eof { TERMINATE_ILLEGAL(); } 346 <HtmlComment> eof { TERMINATE_ILLEGAL(); }
346 <HtmlComment> "-->" { PUSH_LINE_TERMINATOR();} 347 <HtmlComment> "-->" { PUSH_LINE_TERMINATOR();}
347 <HtmlComment> any { goto yy0; } 348 <HtmlComment> any { goto yy0; }
348 */ 349 */
349 350
350 fill: 351 fill:
351 int unfinished_size = cursor_ - start_; 352 int unfinished_size = cursor_ - start_;
352 if (FLAG_trace_lexer) { 353 if (FLAG_trace_lexer) {
353 printf( 354 printf(
354 "scanner needs a refill. Exiting for now with:\n" 355 "scanner needs a refill. Exiting for now with:\n"
355 " saved fill state_ = %d\n" 356 " saved fill state_ = %d\n"
356 " unfinished token size = %d\n", 357 " unfinished token size = %d\n",
357 state_, 358 state_,
358 unfinished_size); 359 unfinished_size);
359 if(0 < unfinished_size && start_ < limit_) { 360 if (0 < unfinished_size && start_ < limit_) {
360 printf(" unfinished token is: "); 361 printf(" unfinished token is: ");
361 fwrite(start_, 1, cursor_ - start_, stdout); 362 fwrite(start_, 1, cursor_ - start_, stdout);
362 putchar('\n'); 363 putchar('\n');
363 } 364 }
364 putchar('\n'); 365 putchar('\n');
365 } 366 }
366 367
367 if (eof_) goto start_; 368 if (eof_) goto start_;
368 369
369 // Once we get here, we can get rid of 370 // Once we get here, we can get rid of
370 // everything before start_ and after limit_. 371 // everything before start_ and after limit_.
371 372
372 if (buffer_ < start_) { 373 if (buffer_ < start_) {
373 size_t start_offset = start_ - buffer_; 374 size_t start_offset = start_ - buffer_;
374 memmove(buffer_, start_, limit_ - start_); 375 memmove(buffer_, start_, limit_ - start_);
375 marker_ -= start_offset; 376 marker_ -= start_offset;
376 cursor_ -= start_offset; 377 cursor_ -= start_offset;
377 limit_ -= start_offset; 378 limit_ -= start_offset;
378 start_ -= start_offset; 379 start_ -= start_offset;
379 real_start_ += start_offset; 380 real_start_ += start_offset;
380 } 381 }
381 return 0; 382 return 0;
382 } 383 }
OLDNEW
« no previous file with comments | « no previous file | src/lexer/lexer-shell.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698