OLD | NEW |
1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 12 matching lines...) Expand all Loading... |
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | 27 |
28 #ifndef V8_SCANNER_H_ | 28 #ifndef V8_SCANNER_H_ |
29 #define V8_SCANNER_H_ | 29 #define V8_SCANNER_H_ |
30 | 30 |
31 #include "token.h" | 31 #include "token.h" |
32 #include "char-predicates-inl.h" | 32 #include "char-predicates-inl.h" |
| 33 #include "scanner-base.h" |
33 | 34 |
34 namespace v8 { | 35 namespace v8 { |
35 namespace internal { | 36 namespace internal { |
36 | 37 |
37 | 38 |
38 class UTF8Buffer { | 39 class UTF8Buffer { |
39 public: | 40 public: |
40 UTF8Buffer(); | 41 UTF8Buffer(); |
41 ~UTF8Buffer(); | 42 ~UTF8Buffer(); |
42 | 43 |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
135 int end_position); | 136 int end_position); |
136 virtual void PushBack(uc32 ch); | 137 virtual void PushBack(uc32 ch); |
137 virtual uc32 Advance(); | 138 virtual uc32 Advance(); |
138 virtual void SeekForward(int pos); | 139 virtual void SeekForward(int pos); |
139 | 140 |
140 private: | 141 private: |
141 const CharType* raw_data_; // Pointer to the actual array of characters. | 142 const CharType* raw_data_; // Pointer to the actual array of characters. |
142 }; | 143 }; |
143 | 144 |
144 | 145 |
145 class KeywordMatcher { | |
146 // Incrementally recognize keywords. | |
147 // | |
148 // Recognized keywords: | |
149 // break case catch const* continue debugger* default delete do else | |
150 // finally false for function if in instanceof native* new null | |
151 // return switch this throw true try typeof var void while with | |
152 // | |
153 // *: Actually "future reserved keywords". These are the only ones we | |
154 // recognized, the remaining are allowed as identifiers. | |
155 public: | |
156 KeywordMatcher() | |
157 : state_(INITIAL), | |
158 token_(Token::IDENTIFIER), | |
159 keyword_(NULL), | |
160 counter_(0), | |
161 keyword_token_(Token::ILLEGAL) {} | |
162 | |
163 Token::Value token() { return token_; } | |
164 | |
165 inline void AddChar(uc32 input) { | |
166 if (state_ != UNMATCHABLE) { | |
167 Step(input); | |
168 } | |
169 } | |
170 | |
171 void Fail() { | |
172 token_ = Token::IDENTIFIER; | |
173 state_ = UNMATCHABLE; | |
174 } | |
175 | |
176 private: | |
177 enum State { | |
178 UNMATCHABLE, | |
179 INITIAL, | |
180 KEYWORD_PREFIX, | |
181 KEYWORD_MATCHED, | |
182 C, | |
183 CA, | |
184 CO, | |
185 CON, | |
186 D, | |
187 DE, | |
188 F, | |
189 I, | |
190 IN, | |
191 N, | |
192 T, | |
193 TH, | |
194 TR, | |
195 V, | |
196 W | |
197 }; | |
198 | |
199 struct FirstState { | |
200 const char* keyword; | |
201 State state; | |
202 Token::Value token; | |
203 }; | |
204 | |
205 // Range of possible first characters of a keyword. | |
206 static const unsigned int kFirstCharRangeMin = 'b'; | |
207 static const unsigned int kFirstCharRangeMax = 'w'; | |
208 static const unsigned int kFirstCharRangeLength = | |
209 kFirstCharRangeMax - kFirstCharRangeMin + 1; | |
210 // State map for first keyword character range. | |
211 static FirstState first_states_[kFirstCharRangeLength]; | |
212 | |
213 // If input equals keyword's character at position, continue matching keyword | |
214 // from that position. | |
215 inline bool MatchKeywordStart(uc32 input, | |
216 const char* keyword, | |
217 int position, | |
218 Token::Value token_if_match) { | |
219 if (input == keyword[position]) { | |
220 state_ = KEYWORD_PREFIX; | |
221 this->keyword_ = keyword; | |
222 this->counter_ = position + 1; | |
223 this->keyword_token_ = token_if_match; | |
224 return true; | |
225 } | |
226 return false; | |
227 } | |
228 | |
229 // If input equals match character, transition to new state and return true. | |
230 inline bool MatchState(uc32 input, char match, State new_state) { | |
231 if (input == match) { | |
232 state_ = new_state; | |
233 return true; | |
234 } | |
235 return false; | |
236 } | |
237 | |
238 inline bool MatchKeyword(uc32 input, | |
239 char match, | |
240 State new_state, | |
241 Token::Value keyword_token) { | |
242 if (input != match) { | |
243 return false; | |
244 } | |
245 state_ = new_state; | |
246 token_ = keyword_token; | |
247 return true; | |
248 } | |
249 | |
250 void Step(uc32 input); | |
251 | |
252 // Current state. | |
253 State state_; | |
254 // Token for currently added characters. | |
255 Token::Value token_; | |
256 | |
257 // Matching a specific keyword string (there is only one possible valid | |
258 // keyword with the current prefix). | |
259 const char* keyword_; | |
260 int counter_; | |
261 Token::Value keyword_token_; | |
262 }; | |
263 | |
264 | |
265 enum ParserLanguage { JAVASCRIPT, JSON }; | 146 enum ParserLanguage { JAVASCRIPT, JSON }; |
266 | 147 |
267 | 148 |
268 class Scanner { | 149 class Scanner { |
269 public: | 150 public: |
270 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; | 151 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; |
271 | 152 |
272 class LiteralScope { | 153 class LiteralScope { |
273 public: | 154 public: |
274 explicit LiteralScope(Scanner* self); | 155 explicit LiteralScope(Scanner* self); |
(...skipping 237 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
512 bool stack_overflow_; | 393 bool stack_overflow_; |
513 static StaticResource<Utf8Decoder> utf8_decoder_; | 394 static StaticResource<Utf8Decoder> utf8_decoder_; |
514 | 395 |
515 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 396 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
516 uc32 c0_; | 397 uc32 c0_; |
517 }; | 398 }; |
518 | 399 |
519 } } // namespace v8::internal | 400 } } // namespace v8::internal |
520 | 401 |
521 #endif // V8_SCANNER_H_ | 402 #endif // V8_SCANNER_H_ |
OLD | NEW |