OLD | NEW |
| 1 // Portions of this code based on re2c: |
| 2 // (re2c/examples/push.re) |
| 3 // Copyright 2013 the V8 project authors. All rights reserved. |
| 4 // Redistribution and use in source and binary forms, with or without |
| 5 // modification, are permitted provided that the following conditions are |
| 6 // met: |
| 7 // |
| 8 // * Redistributions of source code must retain the above copyright |
| 9 // notice, this list of conditions and the following disclaimer. |
| 10 // * Redistributions in binary form must reproduce the above |
| 11 // copyright notice, this list of conditions and the following |
| 12 // disclaimer in the documentation and/or other materials provided |
| 13 // with the distribution. |
| 14 // * Neither the name of Google Inc. nor the names of its |
| 15 // contributors may be used to endorse or promote products derived |
| 16 // from this software without specific prior written permission. |
| 17 // |
| 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 |
1 #include <fcntl.h> | 30 #include <fcntl.h> |
2 #include <stdio.h> | 31 #include <stdio.h> |
3 #include <stddef.h> | 32 #include <stddef.h> |
4 #include <stdlib.h> | 33 #include <stdlib.h> |
5 #include <string.h> | 34 #include <string.h> |
6 | 35 |
7 // TODO: | 36 // TODO: |
8 // - SpiderMonkey compatibility hack: " --> something" is treated | 37 // - SpiderMonkey compatibility hack: " --> something" is treated |
9 // as a single line comment. | 38 // as a single line comment. |
10 // - An identifier cannot start immediately after a number. | 39 // - An identifier cannot start immediately after a number. |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
45 #include "lexer.h" | 74 #include "lexer.h" |
46 | 75 |
47 using namespace v8::internal; | 76 using namespace v8::internal; |
48 | 77 |
49 #define PUSH_TOKEN(T) { send(T); SKIP(); } | 78 #define PUSH_TOKEN(T) { send(T); SKIP(); } |
50 #define PUSH_TOKEN_LOOKAHEAD(T) { --cursor_; send(T); SKIP(); } | 79 #define PUSH_TOKEN_LOOKAHEAD(T) { --cursor_; send(T); SKIP(); } |
51 #define PUSH_EOF_AND_RETURN() { send(Token::EOS); eof_ = true; return 1;} | 80 #define PUSH_EOF_AND_RETURN() { send(Token::EOS); eof_ = true; return 1;} |
52 #define PUSH_LINE_TERMINATOR() { SKIP(); } | 81 #define PUSH_LINE_TERMINATOR() { SKIP(); } |
53 #define TERMINATE_ILLEGAL() { return 1; } | 82 #define TERMINATE_ILLEGAL() { return 1; } |
54 | 83 |
55 class PushScanner { | 84 PushScanner::PushScanner(ExperimentalScanner* sink) |
| 85 : eof_(false), |
| 86 state_(-1), |
| 87 condition_(kConditionNormal), |
| 88 limit_(NULL), |
| 89 start_(NULL), |
| 90 cursor_(NULL), |
| 91 marker_(NULL), |
| 92 real_start_(0), |
| 93 buffer_(NULL), |
| 94 buffer_end_(NULL), |
| 95 yych(0), |
| 96 yyaccept(0), |
| 97 sink_(sink) { |
56 | 98 |
57 public: | 99 } |
58 PushScanner(ExperimentalScanner* sink): | 100 |
59 eof_(false), | 101 PushScanner::~PushScanner() { |
60 state_(-1), | 102 } |
61 condition_(kConditionNormal), | 103 |
62 limit_(NULL), | 104 void PushScanner::send(Token::Value token) { |
63 start_(NULL), | 105 int beg = (start_ - buffer_) + real_start_; |
64 cursor_(NULL), | 106 int end = (cursor_ - buffer_) + real_start_; |
65 marker_(NULL), | 107 if (FLAG_trace_lexer) { |
66 real_start_(0), | 108 printf("got %s at (%d, %d): ", Token::Name(token), beg, end); |
67 buffer_(NULL), | 109 for (uint8_t* s = start_; s != cursor_; s++) printf("%c", (char)*s); |
68 buffer_end_(NULL), | 110 printf(".\n"); |
69 yych(0), | |
70 yyaccept(0), | |
71 sink_(sink) { | |
72 } | 111 } |
| 112 sink_->Record(token, beg, end); |
| 113 } |
73 | 114 |
74 ~PushScanner() { | 115 uint32_t PushScanner::push(const void *input, int input_size) { |
75 } | 116 if (FLAG_trace_lexer) { |
76 | 117 printf( |
77 void send(Token::Value token) { | |
78 int beg = (start_ - buffer_) + real_start_; | |
79 int end = (cursor_ - buffer_) + real_start_; | |
80 if (FLAG_trace_lexer) { | |
81 printf("got %s at (%d, %d): ", Token::Name(token), beg, end); | |
82 for (uint8_t* s = start_; s != cursor_; s++) printf("%c", (char)*s); | |
83 printf(".\n"); | |
84 } | |
85 sink_->Record(token, beg, end); | |
86 } | |
87 | |
88 uint32_t push(const void *input, int input_size) { | |
89 if (FLAG_trace_lexer) { | |
90 printf( | |
91 "scanner is receiving a new data batch of length %d\n" | 118 "scanner is receiving a new data batch of length %d\n" |
92 "scanner continues with saved state_ = %d\n", | 119 "scanner continues with saved state_ = %d\n", |
93 input_size, | 120 input_size, |
94 state_ | 121 state_); |
95 ); | 122 } |
96 } | |
97 | 123 |
98 // Data source is signaling end of file when batch size | 124 // Data source is signaling end of file when batch size |
99 // is less than max_fill. This is slightly annoying because | 125 // is less than max_fill. This is slightly annoying because |
100 // max_fill is a value that can only be known after re2c does | 126 // max_fill is a value that can only be known after re2c does |
101 // its thing. Practically though, max_fill is never bigger than | 127 // its thing. Practically though, max_fill is never bigger than |
102 // the longest keyword, so given our grammar, 32 is a safe bet. | 128 // the longest keyword, so given our grammar, 32 is a safe bet. |
103 | 129 |
104 uint8_t null[64]; | 130 uint8_t null[64]; |
105 const int max_fill = 32; | 131 const int max_fill = 32; |
106 if (input_size < max_fill) { // FIXME: do something about this!!! | 132 if (input_size < max_fill) { // FIXME: do something about this!!! |
107 eof_ = true; | 133 eof_ = true; |
108 input = null; | 134 input = null; |
109 input_size = sizeof(null); | 135 input_size = sizeof(null); |
110 memset(null, 0, sizeof(null)); | 136 memset(null, 0, sizeof(null)); |
111 } | 137 } |
112 | 138 |
113 | 139 |
114 // When we get here, we have a partially | 140 // When we get here, we have a partially |
115 // consumed buffer_ which is in the following state_: | 141 // consumed buffer_ which is in the following state_: |
116 // last valid char last valid buffer_ spo
t | 142 // last valid char last valid buffer_ spot |
117 // v v | 143 // v v |
118 // +-------------------+-------------+---------------+-------------+-------
---------------+ | 144 // +-------------------+-------------+---------------+-------------+---------
-------------+ |
119 // ^ ^ ^ ^ ^ ^ | 145 // ^ ^ ^ ^ ^ ^ |
120 // buffer_ start_ marker_ cursor_ limit_ buffer_en
d_ | 146 // buffer_ start_ marker_ cursor_ limit_ buffer_end_ |
121 // | 147 // |
122 // We need to stretch the buffer_ and concatenate the new chunk of input to
it | 148 // We need to stretch the buffer_ and concatenate the new chunk of input to i
t |
123 | 149 |
124 size_t used = limit_ - buffer_; | 150 size_t used = limit_ - buffer_; |
125 size_t needed = used + input_size; | 151 size_t needed = used + input_size; |
126 size_t allocated = buffer_end_ - buffer_; | 152 size_t allocated = buffer_end_ - buffer_; |
127 if(allocated < needed) { | 153 if (allocated < needed) { |
128 size_t limit__offset = limit_ - buffer_; | 154 size_t limit__offset = limit_ - buffer_; |
129 size_t start_offset = start_ - buffer_; | 155 size_t start_offset = start_ - buffer_; |
130 size_t marker__offset = marker_ - buffer_; | 156 size_t marker__offset = marker_ - buffer_; |
131 size_t cursor__offset = cursor_ - buffer_; | 157 size_t cursor__offset = cursor_ - buffer_; |
132 | 158 |
133 buffer_ = (uint8_t*)realloc(buffer_, needed); | 159 buffer_ = (uint8_t*)realloc(buffer_, needed); |
134 buffer_end_ = needed + buffer_; | 160 buffer_end_ = needed + buffer_; |
135 | 161 |
136 marker_ = marker__offset + buffer_; | 162 marker_ = marker__offset + buffer_; |
137 cursor_ = cursor__offset + buffer_; | 163 cursor_ = cursor__offset + buffer_; |
138 start_ = buffer_ + start_offset; | 164 start_ = buffer_ + start_offset; |
139 limit_ = limit__offset + buffer_; | 165 limit_ = limit__offset + buffer_; |
140 } | 166 } |
141 memcpy(limit_, input, input_size); | 167 memcpy(limit_, input, input_size); |
142 limit_ += input_size; | 168 limit_ += input_size; |
143 | 169 |
144 // The scanner start_s here | 170 // The scanner starts here |
145 #define YYLIMIT limit_ | 171 #define YYLIMIT limit_ |
146 #define YYCURSOR cursor_ | 172 #define YYCURSOR cursor_ |
147 #define YYMARKER marker_ | 173 #define YYMARKER marker_ |
148 #define YYCTYPE uint8_t | 174 #define YYCTYPE uint8_t |
149 | 175 |
150 #define SKIP() { start_ = cursor_; YYSETCONDITION(kConditionNormal); got
o yy0; } | 176 #define SKIP() { start_ = cursor_; YYSETCONDITION(kConditionNormal); goto yy
0; } |
151 #define YYFILL(n) { goto fill; } | 177 #define YYFILL(n) { goto fill; } |
152 | 178 |
153 #define YYGETSTATE() state_ | 179 #define YYGETSTATE() state_ |
154 #define YYSETSTATE(x) { state_ = (x); } | 180 #define YYSETSTATE(x) { state_ = (x); } |
155 | 181 |
156 #define YYGETCONDITION() condition_ | 182 #define YYGETCONDITION() condition_ |
157 #define YYSETCONDITION(x) { condition_ = (x); } | 183 #define YYSETCONDITION(x) { condition_ = (x); } |
158 | 184 |
159 start_: | 185 start_: |
160 if (FLAG_trace_lexer) { | 186 if (FLAG_trace_lexer) { |
161 printf("Starting a round; state_: %d, condition_: %d\n", state_, condition
_); | 187 printf("Starting a round; state_: %d, condition_: %d\n", state_, condition_)
; |
162 } | 188 } |
163 | 189 |
164 /*!re2c | 190 /*!re2c |
165 re2c:indent:top = 1; | 191 re2c:indent:top = 1; |
166 re2c:yych:conversion = 0; | 192 re2c:yych:conversion = 0; |
167 re2c:condenumprefix = kCondition; | 193 re2c:condenumprefix = kCondition; |
168 re2c:define:YYCONDTYPE = Condition; | 194 re2c:define:YYCONDTYPE = Condition; |
169 | 195 |
170 eof = "\000"; | 196 eof = "\000"; |
171 any = [\000-\377]; | 197 any = [\000-\377]; |
172 whitespace_char = [ \t\v\f\r]; | 198 whitespace_char = [ \t\v\f\r]; |
173 whitespace = whitespace_char+; | 199 whitespace = whitespace_char+; |
174 identifier_start_ = [$_\\a-zA-Z]; | 200 identifier_start_ = [$_\\a-zA-Z]; |
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
314 | 340 |
315 <MultiLineComment> [*][//] { PUSH_LINE_TERMINATOR();} | 341 <MultiLineComment> [*][//] { PUSH_LINE_TERMINATOR();} |
316 <MultiLineComment> eof { TERMINATE_ILLEGAL(); } | 342 <MultiLineComment> eof { TERMINATE_ILLEGAL(); } |
317 <MultiLineComment> any { goto yy0; } | 343 <MultiLineComment> any { goto yy0; } |
318 | 344 |
319 <HtmlComment> eof { TERMINATE_ILLEGAL(); } | 345 <HtmlComment> eof { TERMINATE_ILLEGAL(); } |
320 <HtmlComment> "-->" { PUSH_LINE_TERMINATOR();} | 346 <HtmlComment> "-->" { PUSH_LINE_TERMINATOR();} |
321 <HtmlComment> any { goto yy0; } | 347 <HtmlComment> any { goto yy0; } |
322 */ | 348 */ |
323 | 349 |
324 fill: | 350 fill: |
325 int unfinished_size = cursor_ - start_; | 351 int unfinished_size = cursor_ - start_; |
326 if (FLAG_trace_lexer) { | 352 if (FLAG_trace_lexer) { |
327 printf( | 353 printf( |
328 "scanner needs a refill. Exiting for now with:\n" | 354 "scanner needs a refill. Exiting for now with:\n" |
329 " saved fill state_ = %d\n" | 355 " saved fill state_ = %d\n" |
330 " unfinished token size = %d\n", | 356 " unfinished token size = %d\n", |
331 state_, | 357 state_, |
332 unfinished_size | 358 unfinished_size); |
333 ); | 359 if(0 < unfinished_size && start_ < limit_) { |
334 if(0 < unfinished_size && start_ < limit_) { | 360 printf(" unfinished token is: "); |
335 printf(" unfinished token is: "); | 361 fwrite(start_, 1, cursor_ - start_, stdout); |
336 fwrite(start_, 1, cursor_ - start_, stdout); | |
337 putchar('\n'); | |
338 } | |
339 putchar('\n'); | 362 putchar('\n'); |
340 } | 363 } |
341 | 364 putchar('\n'); |
342 if (eof_) goto start_; | |
343 | |
344 // Once we get here, we can get rid of | |
345 // everything before start_ and after limit_. | |
346 | |
347 if (buffer_ < start_) { | |
348 size_t start_offset = start_ - buffer_; | |
349 memmove(buffer_, start_, limit_ - start_); | |
350 marker_ -= start_offset; | |
351 cursor_ -= start_offset; | |
352 limit_ -= start_offset; | |
353 start_ -= start_offset; | |
354 real_start_ += start_offset; | |
355 } | |
356 return 0; | |
357 } | 365 } |
358 | 366 |
359 private: | 367 if (eof_) goto start_; |
360 bool eof_; | |
361 int32_t state_; | |
362 int32_t condition_; | |
363 | 368 |
364 uint8_t* limit_; | 369 // Once we get here, we can get rid of |
365 uint8_t* start_; | 370 // everything before start_ and after limit_. |
366 uint8_t* cursor_; | |
367 uint8_t* marker_; | |
368 int real_start_; | |
369 | 371 |
370 uint8_t* buffer_; | 372 if (buffer_ < start_) { |
371 uint8_t* buffer_end_; | 373 size_t start_offset = start_ - buffer_; |
372 | 374 memmove(buffer_, start_, limit_ - start_); |
373 uint8_t yych; | 375 marker_ -= start_offset; |
374 uint32_t yyaccept; | 376 cursor_ -= start_offset; |
375 | 377 limit_ -= start_offset; |
376 ExperimentalScanner* sink_; | 378 start_ -= start_offset; |
377 }; | 379 real_start_ += start_offset; |
378 | 380 } |
379 | 381 return 0; |
380 ExperimentalScanner::ExperimentalScanner(const char* fname) : | |
381 current_(0), fetched_(0) { | |
382 file_ = fopen(fname, "rb"); | |
383 scanner_ = new PushScanner(this); | |
384 } | 382 } |
385 | |
386 | |
387 ExperimentalScanner::~ExperimentalScanner() { | |
388 fclose(file_); | |
389 } | |
390 | |
391 | |
392 void ExperimentalScanner::FillTokens() { | |
393 current_ = 0; | |
394 fetched_ = 0; | |
395 uint8_t chars[BUFFER_SIZE]; | |
396 int n = static_cast<int>(fread(&chars, 1, BUFFER_SIZE, file_)); | |
397 for (int i = n; i < BUFFER_SIZE; i++) chars[i] = 0; | |
398 scanner_->push(chars, BUFFER_SIZE); | |
399 } | |
400 | |
401 | |
402 Token::Value ExperimentalScanner::Next(int* beg_pos, int* end_pos) { | |
403 while (current_ == fetched_) { | |
404 FillTokens(); | |
405 } | |
406 *beg_pos = beg_[current_]; | |
407 *end_pos = end_[current_]; | |
408 Token::Value res = token_[current_]; | |
409 if (token_[current_] != Token::Token::EOS && | |
410 token_[current_] != Token::ILLEGAL) { | |
411 current_++; | |
412 } | |
413 return res; | |
414 } | |
415 | |
416 | |
417 void ExperimentalScanner::Record(Token::Value token, int beg, int end) { | |
418 if (token == Token::EOS) end--; | |
419 token_[fetched_] = token; | |
420 beg_[fetched_] = beg; | |
421 end_[fetched_] = end; | |
422 fetched_++; | |
423 } | |
OLD | NEW |