Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(249)

Side by Side Diff: src/lexer/lexer.re

Issue 27705002: Experimental parser: Refactoring and timing. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Code review (ulan) Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/lexer/lexer.h ('k') | src/lexer/lexer-shell.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Portions of this code based on re2c:
2 // (re2c/examples/push.re)
3 // Copyright 2013 the V8 project authors. All rights reserved.
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived
16 // from this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
1 #include <fcntl.h> 30 #include <fcntl.h>
2 #include <stdio.h> 31 #include <stdio.h>
3 #include <stddef.h> 32 #include <stddef.h>
4 #include <stdlib.h> 33 #include <stdlib.h>
5 #include <string.h> 34 #include <string.h>
6 35
7 // TODO: 36 // TODO:
8 // - SpiderMonkey compatibility hack: " --> something" is treated 37 // - SpiderMonkey compatibility hack: " --> something" is treated
9 // as a single line comment. 38 // as a single line comment.
10 // - An identifier cannot start immediately after a number. 39 // - An identifier cannot start immediately after a number.
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
45 #include "lexer.h" 74 #include "lexer.h"
46 75
47 using namespace v8::internal; 76 using namespace v8::internal;
48 77
49 #define PUSH_TOKEN(T) { send(T); SKIP(); } 78 #define PUSH_TOKEN(T) { send(T); SKIP(); }
50 #define PUSH_TOKEN_LOOKAHEAD(T) { --cursor_; send(T); SKIP(); } 79 #define PUSH_TOKEN_LOOKAHEAD(T) { --cursor_; send(T); SKIP(); }
51 #define PUSH_EOF_AND_RETURN() { send(Token::EOS); eof_ = true; return 1;} 80 #define PUSH_EOF_AND_RETURN() { send(Token::EOS); eof_ = true; return 1;}
52 #define PUSH_LINE_TERMINATOR() { SKIP(); } 81 #define PUSH_LINE_TERMINATOR() { SKIP(); }
53 #define TERMINATE_ILLEGAL() { return 1; } 82 #define TERMINATE_ILLEGAL() { return 1; }
54 83
55 class PushScanner { 84 PushScanner::PushScanner(ExperimentalScanner* sink)
85 : eof_(false),
86 state_(-1),
87 condition_(kConditionNormal),
88 limit_(NULL),
89 start_(NULL),
90 cursor_(NULL),
91 marker_(NULL),
92 real_start_(0),
93 buffer_(NULL),
94 buffer_end_(NULL),
95 yych(0),
96 yyaccept(0),
97 sink_(sink) {
56 98
57 public: 99 }
58 PushScanner(ExperimentalScanner* sink): 100
59 eof_(false), 101 PushScanner::~PushScanner() {
60 state_(-1), 102 }
61 condition_(kConditionNormal), 103
62 limit_(NULL), 104 void PushScanner::send(Token::Value token) {
63 start_(NULL), 105 int beg = (start_ - buffer_) + real_start_;
64 cursor_(NULL), 106 int end = (cursor_ - buffer_) + real_start_;
65 marker_(NULL), 107 if (FLAG_trace_lexer) {
66 real_start_(0), 108 printf("got %s at (%d, %d): ", Token::Name(token), beg, end);
67 buffer_(NULL), 109 for (uint8_t* s = start_; s != cursor_; s++) printf("%c", (char)*s);
68 buffer_end_(NULL), 110 printf(".\n");
69 yych(0),
70 yyaccept(0),
71 sink_(sink) {
72 } 111 }
112 sink_->Record(token, beg, end);
113 }
73 114
74 ~PushScanner() { 115 uint32_t PushScanner::push(const void *input, int input_size) {
75 } 116 if (FLAG_trace_lexer) {
76 117 printf(
77 void send(Token::Value token) {
78 int beg = (start_ - buffer_) + real_start_;
79 int end = (cursor_ - buffer_) + real_start_;
80 if (FLAG_trace_lexer) {
81 printf("got %s at (%d, %d): ", Token::Name(token), beg, end);
82 for (uint8_t* s = start_; s != cursor_; s++) printf("%c", (char)*s);
83 printf(".\n");
84 }
85 sink_->Record(token, beg, end);
86 }
87
88 uint32_t push(const void *input, int input_size) {
89 if (FLAG_trace_lexer) {
90 printf(
91 "scanner is receiving a new data batch of length %d\n" 118 "scanner is receiving a new data batch of length %d\n"
92 "scanner continues with saved state_ = %d\n", 119 "scanner continues with saved state_ = %d\n",
93 input_size, 120 input_size,
94 state_ 121 state_);
95 ); 122 }
96 }
97 123
98 // Data source is signaling end of file when batch size 124 // Data source is signaling end of file when batch size
99 // is less than max_fill. This is slightly annoying because 125 // is less than max_fill. This is slightly annoying because
100 // max_fill is a value that can only be known after re2c does 126 // max_fill is a value that can only be known after re2c does
101 // its thing. Practically though, max_fill is never bigger than 127 // its thing. Practically though, max_fill is never bigger than
102 // the longest keyword, so given our grammar, 32 is a safe bet. 128 // the longest keyword, so given our grammar, 32 is a safe bet.
103 129
104 uint8_t null[64]; 130 uint8_t null[64];
105 const int max_fill = 32; 131 const int max_fill = 32;
106 if (input_size < max_fill) { // FIXME: do something about this!!! 132 if (input_size < max_fill) { // FIXME: do something about this!!!
107 eof_ = true; 133 eof_ = true;
108 input = null; 134 input = null;
109 input_size = sizeof(null); 135 input_size = sizeof(null);
110 memset(null, 0, sizeof(null)); 136 memset(null, 0, sizeof(null));
111 } 137 }
112 138
113 139
114 // When we get here, we have a partially 140 // When we get here, we have a partially
115 // consumed buffer_ which is in the following state_: 141 // consumed buffer_ which is in the following state_:
116 // last valid char last valid buffer_ spo t 142 // last valid char last valid buffer_ spot
117 // v v 143 // v v
118 // +-------------------+-------------+---------------+-------------+------- ---------------+ 144 // +-------------------+-------------+---------------+-------------+--------- -------------+
119 // ^ ^ ^ ^ ^ ^ 145 // ^ ^ ^ ^ ^ ^
120 // buffer_ start_ marker_ cursor_ limit_ buffer_en d_ 146 // buffer_ start_ marker_ cursor_ limit_ buffer_end_
121 // 147 //
122 // We need to stretch the buffer_ and concatenate the new chunk of input to it 148 // We need to stretch the buffer_ and concatenate the new chunk of input to i t
123 149
124 size_t used = limit_ - buffer_; 150 size_t used = limit_ - buffer_;
125 size_t needed = used + input_size; 151 size_t needed = used + input_size;
126 size_t allocated = buffer_end_ - buffer_; 152 size_t allocated = buffer_end_ - buffer_;
127 if(allocated < needed) { 153 if (allocated < needed) {
128 size_t limit__offset = limit_ - buffer_; 154 size_t limit__offset = limit_ - buffer_;
129 size_t start_offset = start_ - buffer_; 155 size_t start_offset = start_ - buffer_;
130 size_t marker__offset = marker_ - buffer_; 156 size_t marker__offset = marker_ - buffer_;
131 size_t cursor__offset = cursor_ - buffer_; 157 size_t cursor__offset = cursor_ - buffer_;
132 158
133 buffer_ = (uint8_t*)realloc(buffer_, needed); 159 buffer_ = (uint8_t*)realloc(buffer_, needed);
134 buffer_end_ = needed + buffer_; 160 buffer_end_ = needed + buffer_;
135 161
136 marker_ = marker__offset + buffer_; 162 marker_ = marker__offset + buffer_;
137 cursor_ = cursor__offset + buffer_; 163 cursor_ = cursor__offset + buffer_;
138 start_ = buffer_ + start_offset; 164 start_ = buffer_ + start_offset;
139 limit_ = limit__offset + buffer_; 165 limit_ = limit__offset + buffer_;
140 } 166 }
141 memcpy(limit_, input, input_size); 167 memcpy(limit_, input, input_size);
142 limit_ += input_size; 168 limit_ += input_size;
143 169
144 // The scanner start_s here 170 // The scanner starts here
145 #define YYLIMIT limit_ 171 #define YYLIMIT limit_
146 #define YYCURSOR cursor_ 172 #define YYCURSOR cursor_
147 #define YYMARKER marker_ 173 #define YYMARKER marker_
148 #define YYCTYPE uint8_t 174 #define YYCTYPE uint8_t
149 175
150 #define SKIP() { start_ = cursor_; YYSETCONDITION(kConditionNormal); got o yy0; } 176 #define SKIP() { start_ = cursor_; YYSETCONDITION(kConditionNormal); goto yy 0; }
151 #define YYFILL(n) { goto fill; } 177 #define YYFILL(n) { goto fill; }
152 178
153 #define YYGETSTATE() state_ 179 #define YYGETSTATE() state_
154 #define YYSETSTATE(x) { state_ = (x); } 180 #define YYSETSTATE(x) { state_ = (x); }
155 181
156 #define YYGETCONDITION() condition_ 182 #define YYGETCONDITION() condition_
157 #define YYSETCONDITION(x) { condition_ = (x); } 183 #define YYSETCONDITION(x) { condition_ = (x); }
158 184
159 start_: 185 start_:
160 if (FLAG_trace_lexer) { 186 if (FLAG_trace_lexer) {
161 printf("Starting a round; state_: %d, condition_: %d\n", state_, condition _); 187 printf("Starting a round; state_: %d, condition_: %d\n", state_, condition_) ;
162 } 188 }
163 189
164 /*!re2c 190 /*!re2c
165 re2c:indent:top = 1; 191 re2c:indent:top = 1;
166 re2c:yych:conversion = 0; 192 re2c:yych:conversion = 0;
167 re2c:condenumprefix = kCondition; 193 re2c:condenumprefix = kCondition;
168 re2c:define:YYCONDTYPE = Condition; 194 re2c:define:YYCONDTYPE = Condition;
169 195
170 eof = "\000"; 196 eof = "\000";
171 any = [\000-\377]; 197 any = [\000-\377];
172 whitespace_char = [ \t\v\f\r]; 198 whitespace_char = [ \t\v\f\r];
173 whitespace = whitespace_char+; 199 whitespace = whitespace_char+;
174 identifier_start_ = [$_\\a-zA-Z]; 200 identifier_start_ = [$_\\a-zA-Z];
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after
314 340
315 <MultiLineComment> [*][//] { PUSH_LINE_TERMINATOR();} 341 <MultiLineComment> [*][//] { PUSH_LINE_TERMINATOR();}
316 <MultiLineComment> eof { TERMINATE_ILLEGAL(); } 342 <MultiLineComment> eof { TERMINATE_ILLEGAL(); }
317 <MultiLineComment> any { goto yy0; } 343 <MultiLineComment> any { goto yy0; }
318 344
319 <HtmlComment> eof { TERMINATE_ILLEGAL(); } 345 <HtmlComment> eof { TERMINATE_ILLEGAL(); }
320 <HtmlComment> "-->" { PUSH_LINE_TERMINATOR();} 346 <HtmlComment> "-->" { PUSH_LINE_TERMINATOR();}
321 <HtmlComment> any { goto yy0; } 347 <HtmlComment> any { goto yy0; }
322 */ 348 */
323 349
324 fill: 350 fill:
325 int unfinished_size = cursor_ - start_; 351 int unfinished_size = cursor_ - start_;
326 if (FLAG_trace_lexer) { 352 if (FLAG_trace_lexer) {
327 printf( 353 printf(
328 "scanner needs a refill. Exiting for now with:\n" 354 "scanner needs a refill. Exiting for now with:\n"
329 " saved fill state_ = %d\n" 355 " saved fill state_ = %d\n"
330 " unfinished token size = %d\n", 356 " unfinished token size = %d\n",
331 state_, 357 state_,
332 unfinished_size 358 unfinished_size);
333 ); 359 if(0 < unfinished_size && start_ < limit_) {
334 if(0 < unfinished_size && start_ < limit_) { 360 printf(" unfinished token is: ");
335 printf(" unfinished token is: "); 361 fwrite(start_, 1, cursor_ - start_, stdout);
336 fwrite(start_, 1, cursor_ - start_, stdout);
337 putchar('\n');
338 }
339 putchar('\n'); 362 putchar('\n');
340 } 363 }
341 364 putchar('\n');
342 if (eof_) goto start_;
343
344 // Once we get here, we can get rid of
345 // everything before start_ and after limit_.
346
347 if (buffer_ < start_) {
348 size_t start_offset = start_ - buffer_;
349 memmove(buffer_, start_, limit_ - start_);
350 marker_ -= start_offset;
351 cursor_ -= start_offset;
352 limit_ -= start_offset;
353 start_ -= start_offset;
354 real_start_ += start_offset;
355 }
356 return 0;
357 } 365 }
358 366
359 private: 367 if (eof_) goto start_;
360 bool eof_;
361 int32_t state_;
362 int32_t condition_;
363 368
364 uint8_t* limit_; 369 // Once we get here, we can get rid of
365 uint8_t* start_; 370 // everything before start_ and after limit_.
366 uint8_t* cursor_;
367 uint8_t* marker_;
368 int real_start_;
369 371
370 uint8_t* buffer_; 372 if (buffer_ < start_) {
371 uint8_t* buffer_end_; 373 size_t start_offset = start_ - buffer_;
372 374 memmove(buffer_, start_, limit_ - start_);
373 uint8_t yych; 375 marker_ -= start_offset;
374 uint32_t yyaccept; 376 cursor_ -= start_offset;
375 377 limit_ -= start_offset;
376 ExperimentalScanner* sink_; 378 start_ -= start_offset;
377 }; 379 real_start_ += start_offset;
378 380 }
379 381 return 0;
380 ExperimentalScanner::ExperimentalScanner(const char* fname) :
381 current_(0), fetched_(0) {
382 file_ = fopen(fname, "rb");
383 scanner_ = new PushScanner(this);
384 } 382 }
385
386
387 ExperimentalScanner::~ExperimentalScanner() {
388 fclose(file_);
389 }
390
391
392 void ExperimentalScanner::FillTokens() {
393 current_ = 0;
394 fetched_ = 0;
395 uint8_t chars[BUFFER_SIZE];
396 int n = static_cast<int>(fread(&chars, 1, BUFFER_SIZE, file_));
397 for (int i = n; i < BUFFER_SIZE; i++) chars[i] = 0;
398 scanner_->push(chars, BUFFER_SIZE);
399 }
400
401
402 Token::Value ExperimentalScanner::Next(int* beg_pos, int* end_pos) {
403 while (current_ == fetched_) {
404 FillTokens();
405 }
406 *beg_pos = beg_[current_];
407 *end_pos = end_[current_];
408 Token::Value res = token_[current_];
409 if (token_[current_] != Token::Token::EOS &&
410 token_[current_] != Token::ILLEGAL) {
411 current_++;
412 }
413 return res;
414 }
415
416
417 void ExperimentalScanner::Record(Token::Value token, int beg, int end) {
418 if (token == Token::EOS) end--;
419 token_[fetched_] = token;
420 beg_[fetched_] = beg;
421 end_[fetched_] = end;
422 fetched_++;
423 }
OLDNEW
« no previous file with comments | « src/lexer/lexer.h ('k') | src/lexer/lexer-shell.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698