| OLD | NEW |
| 1 // Portions of this code based on re2c: | 1 // Portions of this code based on re2c: |
| 2 // (re2c/examples/push.re) | 2 // (re2c/examples/push.re) |
| 3 // Copyright 2013 the V8 project authors. All rights reserved. | 3 // Copyright 2013 the V8 project authors. All rights reserved. |
| 4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
| 5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
| 6 // met: | 6 // met: |
| 7 // | 7 // |
| 8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
| 9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
| 10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
| 11 // copyright notice, this list of conditions and the following | 11 // copyright notice, this list of conditions and the following |
| 12 // disclaimer in the documentation and/or other materials provided | 12 // disclaimer in the documentation and/or other materials provided |
| 13 // with the distribution. | 13 // with the distribution. |
| 14 // * Neither the name of Google Inc. nor the names of its | 14 // * Neither the name of Google Inc. nor the names of its |
| 15 // contributors may be used to endorse or promote products derived | 15 // contributors may be used to endorse or promote products derived |
| 16 // from this software without specific prior written permission. | 16 // from this software without specific prior written permission. |
| 17 // | 17 // |
| 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | 29 |
| 30 #include "lexer.h" |
| 31 |
| 30 #include <stdio.h> | 32 #include <stdio.h> |
| 31 #include <stdlib.h> | 33 #include <stdlib.h> |
| 32 #include <string.h> | 34 #include <string.h> |
| 33 | 35 |
| 34 // FIXME: some of this is probably not needed. | 36 // FIXME: some of this is probably not needed. |
| 35 #include "allocation.h" | 37 #include "allocation.h" |
| 36 #include "ast.h" | 38 #include "ast.h" |
| 37 #include "preparse-data-format.h" | 39 #include "preparse-data-format.h" |
| 38 #include "preparse-data.h" | 40 #include "preparse-data.h" |
| 39 #include "scopes.h" | 41 #include "scopes.h" |
| 40 #include "preparser.h" | 42 #include "preparser.h" |
| 41 #include "api.h" | 43 #include "api.h" |
| 42 #include "ast.h" | 44 #include "ast.h" |
| 43 #include "bootstrapper.h" | 45 #include "bootstrapper.h" |
| 44 #include "char-predicates-inl.h" | 46 #include "char-predicates-inl.h" |
| 45 #include "codegen.h" | 47 #include "codegen.h" |
| 46 #include "compiler.h" | 48 #include "compiler.h" |
| 47 #include "func-name-inferrer.h" | 49 #include "func-name-inferrer.h" |
| 48 #include "messages.h" | 50 #include "messages.h" |
| 49 #include "parser.h" | 51 #include "parser.h" |
| 50 #include "platform.h" | 52 #include "platform.h" |
| 51 #include "preparser.h" | 53 #include "preparser.h" |
| 52 #include "runtime.h" | 54 #include "runtime.h" |
| 53 #include "scanner-character-streams.h" | 55 #include "scanner-character-streams.h" |
| 54 #include "scopeinfo.h" | 56 #include "scopeinfo.h" |
| 55 #include "string-stream.h" | 57 #include "string-stream.h" |
| 56 | 58 |
| 59 #include "experimental-scanner.h" |
| 57 | 60 |
| 58 // TODO: | 61 // TODO: |
| 59 // - Run-time lexing modifications: harmony number literals, keywords depending | 62 // - Run-time lexing modifications: harmony number literals, keywords depending |
| 60 // on harmony_modules, harmony_scoping | 63 // on harmony_modules, harmony_scoping |
| 61 // - Escaping the string literals (like the baseline does) | 64 // - Escaping the string literals (like the baseline does) |
| 62 // - Error recovery after illegal tokens. | 65 // - Error recovery after illegal tokens. |
| 63 | 66 |
| 64 enum Condition { | 67 enum Condition { |
| 65 kConditionNormal, | 68 kConditionNormal, |
| 66 kConditionDoubleQuoteString, | 69 kConditionDoubleQuoteString, |
| 67 kConditionSingleQuoteString, | 70 kConditionSingleQuoteString, |
| 68 kConditionIdentifier, | 71 kConditionIdentifier, |
| 69 kConditionIdentifierIllegal, | 72 kConditionIdentifierIllegal, |
| 70 kConditionSingleLineComment, | 73 kConditionSingleLineComment, |
| 71 kConditionMultiLineComment, | 74 kConditionMultiLineComment, |
| 72 kConditionHtmlComment | 75 kConditionHtmlComment |
| 73 }; | 76 }; |
| 74 | 77 |
| 75 #if defined(WIN32) | |
| 76 | |
| 77 typedef signed char int8_t; | |
| 78 typedef signed short int16_t; | |
| 79 typedef signed int int32_t; | |
| 80 | |
| 81 typedef unsigned char uint8_t; | |
| 82 typedef unsigned short uint16_t; | |
| 83 typedef unsigned int uint32_t; | |
| 84 | |
| 85 #else | |
| 86 | |
| 87 #include <stdint.h> | |
| 88 #include <unistd.h> | |
| 89 | |
| 90 #ifndef O_BINARY | |
| 91 #define O_BINARY 0 | |
| 92 #endif | |
| 93 | |
| 94 #endif // defined(WIN32) | |
| 95 | |
| 96 #include "experimental-scanner.h" | |
| 97 #include "lexer.h" | |
| 98 | |
| 99 using namespace v8::internal; | 78 using namespace v8::internal; |
| 100 | 79 |
| 101 namespace { | 80 namespace { |
| 102 | 81 |
| 103 inline int HexValue(uc32 c) { | 82 inline int HexValue(uc32 c) { |
| 104 c -= '0'; | 83 c -= '0'; |
| 105 if (static_cast<unsigned>(c) <= 9) return c; | 84 if (static_cast<unsigned>(c) <= 9) return c; |
| 106 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. | 85 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. |
| 107 if (static_cast<unsigned>(c) <= 5) return c + 10; | 86 if (static_cast<unsigned>(c) <= 5) return c + 10; |
| 108 return -1; | 87 return -1; |
| 109 } | 88 } |
| 110 | 89 |
| 111 } | 90 } |
| 112 | 91 |
| 113 #define PUSH_TOKEN(T) { send(T); SKIP(); } | 92 #define PUSH_TOKEN(T) { send(T); SKIP(); } |
| 114 #define PUSH_TOKEN_LOOKAHEAD(T) { --cursor_; send(T); SKIP(); } | 93 #define PUSH_TOKEN_LOOKAHEAD(T) { --cursor_; send(T); SKIP(); } |
| 115 #define PUSH_EOF_AND_RETURN() { send(Token::EOS); eof_ = true; return 1;} | 94 #define PUSH_EOF_AND_RETURN() { send(Token::EOS); eof_ = true; return 1;} |
| 116 #define PUSH_LINE_TERMINATOR() { just_seen_line_terminator_ = true; SKIP(); } | 95 #define PUSH_LINE_TERMINATOR() { just_seen_line_terminator_ = true; SKIP(); } |
| 117 #define TERMINATE_ILLEGAL() { send(Token::ILLEGAL); send(Token::EOS); return 1;
} | 96 #define TERMINATE_ILLEGAL() { send(Token::ILLEGAL); send(Token::EOS); return 1;
} |
| 118 | 97 |
| 119 #define YYCTYPE uint8_t | |
| 120 | |
| 121 PushScanner::PushScanner(ExperimentalScanner* sink, UnicodeCache* unicode_cache) | 98 PushScanner::PushScanner(ExperimentalScanner* sink, UnicodeCache* unicode_cache) |
| 122 : unicode_cache_(unicode_cache), | 99 : unicode_cache_(unicode_cache), |
| 123 eof_(false), | 100 eof_(false), |
| 124 state_(-1), | 101 state_(-1), |
| 125 condition_(kConditionNormal), | 102 condition_(kConditionNormal), |
| 126 limit_(NULL), | 103 limit_(NULL), |
| 127 start_(NULL), | 104 start_(NULL), |
| 128 cursor_(NULL), | 105 cursor_(NULL), |
| 129 marker_(NULL), | 106 marker_(NULL), |
| 130 real_start_(0), | 107 real_start_(0), |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 163 | 140 |
| 164 bool PushScanner::ValidIdentifierStart() { | 141 bool PushScanner::ValidIdentifierStart() { |
| 165 return unicode_cache_->IsIdentifierStart(ScanHexNumber(4)); | 142 return unicode_cache_->IsIdentifierStart(ScanHexNumber(4)); |
| 166 } | 143 } |
| 167 | 144 |
| 168 void PushScanner::send(Token::Value token) { | 145 void PushScanner::send(Token::Value token) { |
| 169 int beg = (start_ - buffer_) + real_start_; | 146 int beg = (start_ - buffer_) + real_start_; |
| 170 int end = (cursor_ - buffer_) + real_start_; | 147 int end = (cursor_ - buffer_) + real_start_; |
| 171 if (FLAG_trace_lexer) { | 148 if (FLAG_trace_lexer) { |
| 172 printf("got %s at (%d, %d): ", Token::Name(token), beg, end); | 149 printf("got %s at (%d, %d): ", Token::Name(token), beg, end); |
| 173 for (uint8_t* s = start_; s != cursor_; s++) printf("%c", (char)*s); | 150 for (YYCTYPE* s = start_; s != cursor_; s++) printf("%c", (char)*s); |
| 174 printf(".\n"); | 151 printf(".\n"); |
| 175 } | 152 } |
| 176 just_seen_line_terminator_ = false; | 153 just_seen_line_terminator_ = false; |
| 177 sink_->Record(token, beg, end); | 154 sink_->Record(token, beg, end); |
| 178 } | 155 } |
| 179 | 156 |
| 180 uint32_t PushScanner::push(const void *input, int input_size) { | 157 uint32_t PushScanner::push(const void *input, int input_size) { |
| 181 if (FLAG_trace_lexer) { | 158 if (FLAG_trace_lexer) { |
| 182 printf( | 159 printf( |
| 183 "scanner is receiving a new data batch of length %d\n" | 160 "scanner is receiving a new data batch of length %d\n" |
| 184 "scanner continues with saved state_ = %d\n", | 161 "scanner continues with saved state_ = %d\n", |
| 185 input_size, | 162 input_size, |
| 186 state_); | 163 state_); |
| 187 } | 164 } |
| 188 | 165 |
| 189 // Data source is signaling end of file when batch size | 166 // Data source is signaling end of file when batch size |
| 190 // is less than max_fill. This is slightly annoying because | 167 // is less than max_fill. This is slightly annoying because |
| 191 // max_fill is a value that can only be known after re2c does | 168 // max_fill is a value that can only be known after re2c does |
| 192 // its thing. Practically though, max_fill is never bigger than | 169 // its thing. Practically though, max_fill is never bigger than |
| 193 // the longest keyword, so given our grammar, 32 is a safe bet. | 170 // the longest keyword, so given our grammar, 32 is a safe bet. |
| 194 | 171 |
| 195 uint8_t null[64]; | 172 YYCTYPE null[64]; |
| 196 const int max_fill = 32; | 173 const int max_fill = 32; |
| 197 if (input_size < max_fill) { // FIXME: do something about this!!! | 174 if (input_size < max_fill) { // FIXME: do something about this!!! |
| 198 eof_ = true; | 175 eof_ = true; |
| 199 input = null; | 176 input = null; |
| 200 input_size = sizeof(null); | 177 input_size = sizeof(null); |
| 201 memset(null, 0, sizeof(null)); | 178 memset(null, 0, sizeof(null)); |
| 202 } | 179 } |
| 203 | 180 |
| 204 | 181 |
| 205 // When we get here, we have a partially | 182 // When we get here, we have a partially |
| 206 // consumed buffer_ which is in the following state_: | 183 // consumed buffer_ which is in the following state_: |
| 207 // last valid char last valid buffer_ spot | 184 // last valid char last valid buffer_ spot |
| 208 // v v | 185 // v v |
| 209 // +-------------------+-------------+---------------+-------------+---------
-------------+ | 186 // +-------------------+-------------+---------------+-------------+---------
-------------+ |
| 210 // ^ ^ ^ ^ ^ ^ | 187 // ^ ^ ^ ^ ^ ^ |
| 211 // buffer_ start_ marker_ cursor_ limit_ buffer_end_ | 188 // buffer_ start_ marker_ cursor_ limit_ buffer_end_ |
| 212 // | 189 // |
| 213 // We need to stretch the buffer_ and concatenate the new chunk of input to i
t | 190 // We need to stretch the buffer_ and concatenate the new chunk of input to i
t |
| 214 | 191 |
| 215 size_t used = limit_ - buffer_; | 192 size_t used = limit_ - buffer_; |
| 216 size_t needed = used + input_size; | 193 size_t needed = used + input_size; |
| 217 size_t allocated = buffer_end_ - buffer_; | 194 size_t allocated = buffer_end_ - buffer_; |
| 218 if (allocated < needed) { | 195 if (allocated < needed) { |
| 219 size_t limit__offset = limit_ - buffer_; | 196 size_t limit__offset = limit_ - buffer_; |
| 220 size_t start_offset = start_ - buffer_; | 197 size_t start_offset = start_ - buffer_; |
| 221 size_t marker__offset = marker_ - buffer_; | 198 size_t marker__offset = marker_ - buffer_; |
| 222 size_t cursor__offset = cursor_ - buffer_; | 199 size_t cursor__offset = cursor_ - buffer_; |
| 223 | 200 |
| 224 buffer_ = (uint8_t*)realloc(buffer_, needed); | 201 buffer_ = (YYCTYPE*)realloc(buffer_, needed); |
| 225 buffer_end_ = needed + buffer_; | 202 buffer_end_ = needed + buffer_; |
| 226 | 203 |
| 227 marker_ = marker__offset + buffer_; | 204 marker_ = marker__offset + buffer_; |
| 228 cursor_ = cursor__offset + buffer_; | 205 cursor_ = cursor__offset + buffer_; |
| 229 start_ = buffer_ + start_offset; | 206 start_ = buffer_ + start_offset; |
| 230 limit_ = limit__offset + buffer_; | 207 limit_ = limit__offset + buffer_; |
| 231 } | 208 } |
| 232 memcpy(limit_, input, input_size); | 209 memcpy(limit_, input, input_size); |
| 233 limit_ += input_size; | 210 limit_ += input_size; |
| 234 | 211 |
| (...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 463 size_t start_offset = start_ - buffer_; | 440 size_t start_offset = start_ - buffer_; |
| 464 memmove(buffer_, start_, limit_ - start_); | 441 memmove(buffer_, start_, limit_ - start_); |
| 465 marker_ -= start_offset; | 442 marker_ -= start_offset; |
| 466 cursor_ -= start_offset; | 443 cursor_ -= start_offset; |
| 467 limit_ -= start_offset; | 444 limit_ -= start_offset; |
| 468 start_ -= start_offset; | 445 start_ -= start_offset; |
| 469 real_start_ += start_offset; | 446 real_start_ += start_offset; |
| 470 } | 447 } |
| 471 return 0; | 448 return 0; |
| 472 } | 449 } |
| OLD | NEW |