OLD | NEW |
1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
170 reinterpret_cast<intptr_t>(backing_store) % sizeof(unsigned))); | 170 reinterpret_cast<intptr_t>(backing_store) % sizeof(unsigned))); |
171 } | 171 } |
172 | 172 |
173 // Read strings written by ParserRecorder::WriteString. | 173 // Read strings written by ParserRecorder::WriteString. |
174 static const char* ReadString(unsigned* start, int* chars); | 174 static const char* ReadString(unsigned* start, int* chars); |
175 | 175 |
176 friend class ScriptData; | 176 friend class ScriptData; |
177 }; | 177 }; |
178 | 178 |
179 | 179 |
180 class Parser { | 180 class ParserApi { |
181 public: | 181 public: |
182 Parser(Handle<Script> script, bool allow_natives_syntax, | |
183 v8::Extension* extension, ParserMode is_pre_parsing, | |
184 ParserFactory* factory, ParserLog* log, ScriptDataImpl* pre_data); | |
185 virtual ~Parser() { } | |
186 | |
187 // Parses the source code represented by the compilation info and sets its | 182 // Parses the source code represented by the compilation info and sets its |
188 // function literal. Returns false (and deallocates any allocated AST | 183 // function literal. Returns false (and deallocates any allocated AST |
189 // nodes) if parsing failed. | 184 // nodes) if parsing failed. |
190 static bool Parse(CompilationInfo* info); | 185 static bool Parse(CompilationInfo* info); |
191 | 186 |
192 // Generic preparser generating full preparse data. | 187 // Generic preparser generating full preparse data. |
193 static ScriptDataImpl* PreParse(Handle<String> source, | 188 static ScriptDataImpl* PreParse(Handle<String> source, |
194 unibrow::CharacterStream* stream, | 189 unibrow::CharacterStream* stream, |
195 v8::Extension* extension); | 190 v8::Extension* extension); |
196 | 191 |
197 // Preparser that only does preprocessing that makes sense if only used | 192 // Preparser that only does preprocessing that makes sense if only used |
198 // immediately after. | 193 // immediately after. |
199 static ScriptDataImpl* PartialPreParse(Handle<String> source, | 194 static ScriptDataImpl* PartialPreParse(Handle<String> source, |
200 unibrow::CharacterStream* stream, | 195 unibrow::CharacterStream* stream, |
201 v8::Extension* extension); | 196 v8::Extension* extension); |
| 197 }; |
| 198 |
| 199 |
| 200 // A BuffferedZoneList is an automatically growing list, just like (and backed |
| 201 // by) a ZoneList, that is optimized for the case of adding and removing |
| 202 // a single element. The last element added is stored outside the backing list, |
| 203 // and if no more than one element is ever added, the ZoneList isn't even |
| 204 // allocated. |
| 205 // Elements must not be NULL pointers. |
| 206 template <typename T, int initial_size> |
| 207 class BufferedZoneList { |
| 208 public: |
| 209 BufferedZoneList() : list_(NULL), last_(NULL) {} |
| 210 |
| 211 // Adds element at end of list. This element is buffered and can |
| 212 // be read using last() or removed using RemoveLast until a new Add or until |
| 213 // RemoveLast or GetList has been called. |
| 214 void Add(T* value) { |
| 215 if (last_ != NULL) { |
| 216 if (list_ == NULL) { |
| 217 list_ = new ZoneList<T*>(initial_size); |
| 218 } |
| 219 list_->Add(last_); |
| 220 } |
| 221 last_ = value; |
| 222 } |
| 223 |
| 224 T* last() { |
| 225 ASSERT(last_ != NULL); |
| 226 return last_; |
| 227 } |
| 228 |
| 229 T* RemoveLast() { |
| 230 ASSERT(last_ != NULL); |
| 231 T* result = last_; |
| 232 if ((list_ != NULL) && (list_->length() > 0)) |
| 233 last_ = list_->RemoveLast(); |
| 234 else |
| 235 last_ = NULL; |
| 236 return result; |
| 237 } |
| 238 |
| 239 T* Get(int i) { |
| 240 ASSERT((0 <= i) && (i < length())); |
| 241 if (list_ == NULL) { |
| 242 ASSERT_EQ(0, i); |
| 243 return last_; |
| 244 } else { |
| 245 if (i == list_->length()) { |
| 246 ASSERT(last_ != NULL); |
| 247 return last_; |
| 248 } else { |
| 249 return list_->at(i); |
| 250 } |
| 251 } |
| 252 } |
| 253 |
| 254 void Clear() { |
| 255 list_ = NULL; |
| 256 last_ = NULL; |
| 257 } |
| 258 |
| 259 int length() { |
| 260 int length = (list_ == NULL) ? 0 : list_->length(); |
| 261 return length + ((last_ == NULL) ? 0 : 1); |
| 262 } |
| 263 |
| 264 ZoneList<T*>* GetList() { |
| 265 if (list_ == NULL) { |
| 266 list_ = new ZoneList<T*>(initial_size); |
| 267 } |
| 268 if (last_ != NULL) { |
| 269 list_->Add(last_); |
| 270 last_ = NULL; |
| 271 } |
| 272 return list_; |
| 273 } |
| 274 |
| 275 private: |
| 276 ZoneList<T*>* list_; |
| 277 T* last_; |
| 278 }; |
| 279 |
| 280 |
| 281 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. |
| 282 class RegExpBuilder: public ZoneObject { |
| 283 public: |
| 284 RegExpBuilder(); |
| 285 void AddCharacter(uc16 character); |
| 286 // "Adds" an empty expression. Does nothing except consume a |
| 287 // following quantifier |
| 288 void AddEmpty(); |
| 289 void AddAtom(RegExpTree* tree); |
| 290 void AddAssertion(RegExpTree* tree); |
| 291 void NewAlternative(); // '|' |
| 292 void AddQuantifierToAtom(int min, int max, RegExpQuantifier::Type type); |
| 293 RegExpTree* ToRegExp(); |
| 294 |
| 295 private: |
| 296 void FlushCharacters(); |
| 297 void FlushText(); |
| 298 void FlushTerms(); |
| 299 bool pending_empty_; |
| 300 ZoneList<uc16>* characters_; |
| 301 BufferedZoneList<RegExpTree, 2> terms_; |
| 302 BufferedZoneList<RegExpTree, 2> text_; |
| 303 BufferedZoneList<RegExpTree, 2> alternatives_; |
| 304 #ifdef DEBUG |
| 305 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; |
| 306 #define LAST(x) last_added_ = x; |
| 307 #else |
| 308 #define LAST(x) |
| 309 #endif |
| 310 }; |
| 311 |
| 312 |
| 313 class RegExpParser { |
| 314 public: |
| 315 RegExpParser(FlatStringReader* in, |
| 316 Handle<String>* error, |
| 317 bool multiline_mode); |
202 | 318 |
203 static bool ParseRegExp(FlatStringReader* input, | 319 static bool ParseRegExp(FlatStringReader* input, |
204 bool multiline, | 320 bool multiline, |
205 RegExpCompileData* result); | 321 RegExpCompileData* result); |
206 | 322 |
| 323 RegExpTree* ParsePattern(); |
| 324 RegExpTree* ParseDisjunction(); |
| 325 RegExpTree* ParseGroup(); |
| 326 RegExpTree* ParseCharacterClass(); |
| 327 |
| 328 // Parses a {...,...} quantifier and stores the range in the given |
| 329 // out parameters. |
| 330 bool ParseIntervalQuantifier(int* min_out, int* max_out); |
| 331 |
| 332 // Parses and returns a single escaped character. The character |
| 333 // must not be 'b' or 'B' since they are usually handle specially. |
| 334 uc32 ParseClassCharacterEscape(); |
| 335 |
| 336 // Checks whether the following is a length-digit hexadecimal number, |
| 337 // and sets the value if it is. |
| 338 bool ParseHexEscape(int length, uc32* value); |
| 339 |
| 340 uc32 ParseControlLetterEscape(); |
| 341 uc32 ParseOctalLiteral(); |
| 342 |
| 343 // Tries to parse the input as a back reference. If successful it |
| 344 // stores the result in the output parameter and returns true. If |
| 345 // it fails it will push back the characters read so the same characters |
| 346 // can be reparsed. |
| 347 bool ParseBackReferenceIndex(int* index_out); |
| 348 |
| 349 CharacterRange ParseClassAtom(uc16* char_class); |
| 350 RegExpTree* ReportError(Vector<const char> message); |
| 351 void Advance(); |
| 352 void Advance(int dist); |
| 353 void Reset(int pos); |
| 354 |
| 355 // Reports whether the pattern might be used as a literal search string. |
| 356 // Only use if the result of the parse is a single atom node. |
| 357 bool simple(); |
| 358 bool contains_anchor() { return contains_anchor_; } |
| 359 void set_contains_anchor() { contains_anchor_ = true; } |
| 360 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } |
| 361 int position() { return next_pos_ - 1; } |
| 362 bool failed() { return failed_; } |
| 363 |
| 364 static const int kMaxCaptures = 1 << 16; |
| 365 static const uc32 kEndMarker = (1 << 21); |
| 366 |
| 367 private: |
| 368 enum SubexpressionType { |
| 369 INITIAL, |
| 370 CAPTURE, // All positive values represent captures. |
| 371 POSITIVE_LOOKAHEAD, |
| 372 NEGATIVE_LOOKAHEAD, |
| 373 GROUPING |
| 374 }; |
| 375 |
| 376 class RegExpParserState : public ZoneObject { |
| 377 public: |
| 378 RegExpParserState(RegExpParserState* previous_state, |
| 379 SubexpressionType group_type, |
| 380 int disjunction_capture_index) |
| 381 : previous_state_(previous_state), |
| 382 builder_(new RegExpBuilder()), |
| 383 group_type_(group_type), |
| 384 disjunction_capture_index_(disjunction_capture_index) {} |
| 385 // Parser state of containing expression, if any. |
| 386 RegExpParserState* previous_state() { return previous_state_; } |
| 387 bool IsSubexpression() { return previous_state_ != NULL; } |
| 388 // RegExpBuilder building this regexp's AST. |
| 389 RegExpBuilder* builder() { return builder_; } |
| 390 // Type of regexp being parsed (parenthesized group or entire regexp). |
| 391 SubexpressionType group_type() { return group_type_; } |
| 392 // Index in captures array of first capture in this sub-expression, if any. |
| 393 // Also the capture index of this sub-expression itself, if group_type |
| 394 // is CAPTURE. |
| 395 int capture_index() { return disjunction_capture_index_; } |
| 396 |
| 397 private: |
| 398 // Linked list implementation of stack of states. |
| 399 RegExpParserState* previous_state_; |
| 400 // Builder for the stored disjunction. |
| 401 RegExpBuilder* builder_; |
| 402 // Stored disjunction type (capture, look-ahead or grouping), if any. |
| 403 SubexpressionType group_type_; |
| 404 // Stored disjunction's capture index (if any). |
| 405 int disjunction_capture_index_; |
| 406 }; |
| 407 |
| 408 uc32 current() { return current_; } |
| 409 bool has_more() { return has_more_; } |
| 410 bool has_next() { return next_pos_ < in()->length(); } |
| 411 uc32 Next(); |
| 412 FlatStringReader* in() { return in_; } |
| 413 void ScanForCaptures(); |
| 414 uc32 current_; |
| 415 bool has_more_; |
| 416 bool multiline_; |
| 417 int next_pos_; |
| 418 FlatStringReader* in_; |
| 419 Handle<String>* error_; |
| 420 bool simple_; |
| 421 bool contains_anchor_; |
| 422 ZoneList<RegExpCapture*>* captures_; |
| 423 bool is_scanned_for_captures_; |
| 424 // The capture count is only valid after we have scanned for captures. |
| 425 int capture_count_; |
| 426 bool failed_; |
| 427 }; |
| 428 |
| 429 |
| 430 class Parser { |
| 431 public: |
| 432 Parser(Handle<Script> script, bool allow_natives_syntax, |
| 433 v8::Extension* extension, ParserMode is_pre_parsing, |
| 434 ParserFactory* factory, ParserLog* log, ScriptDataImpl* pre_data); |
| 435 virtual ~Parser() { } |
| 436 |
207 // Pre-parse the program from the character stream; returns true on | 437 // Pre-parse the program from the character stream; returns true on |
208 // success, false if a stack-overflow happened during parsing. | 438 // success, false if a stack-overflow happened during parsing. |
209 bool PreParseProgram(Handle<String> source, unibrow::CharacterStream* stream); | 439 bool PreParseProgram(Handle<String> source, unibrow::CharacterStream* stream); |
210 | 440 |
211 void ReportMessage(const char* message, Vector<const char*> args); | 441 void ReportMessage(const char* message, Vector<const char*> args); |
212 virtual void ReportMessageAt(Scanner::Location loc, | 442 virtual void ReportMessageAt(Scanner::Location loc, |
213 const char* message, | 443 const char* message, |
214 Vector<const char*> args) = 0; | 444 Vector<const char*> args) = 0; |
215 | 445 |
216 | 446 |
(...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
402 // Generate AST node that throw a TypeError with the given | 632 // Generate AST node that throw a TypeError with the given |
403 // type. Both arguments must be non-null (in the handle sense). | 633 // type. Both arguments must be non-null (in the handle sense). |
404 Expression* NewThrowTypeError(Handle<String> type, | 634 Expression* NewThrowTypeError(Handle<String> type, |
405 Handle<Object> first, | 635 Handle<Object> first, |
406 Handle<Object> second); | 636 Handle<Object> second); |
407 | 637 |
408 // Generic AST generator for throwing errors from compiled code. | 638 // Generic AST generator for throwing errors from compiled code. |
409 Expression* NewThrowError(Handle<String> constructor, | 639 Expression* NewThrowError(Handle<String> constructor, |
410 Handle<String> type, | 640 Handle<String> type, |
411 Vector< Handle<Object> > arguments); | 641 Vector< Handle<Object> > arguments); |
412 | |
413 friend class Target; | |
414 friend class TargetScope; | |
415 friend class LexicalScope; | |
416 friend class TemporaryScope; | |
417 }; | 642 }; |
418 | 643 |
419 | 644 |
420 // Support for handling complex values (array and object literals) that | 645 // Support for handling complex values (array and object literals) that |
421 // can be fully handled at compile time. | 646 // can be fully handled at compile time. |
422 class CompileTimeValue: public AllStatic { | 647 class CompileTimeValue: public AllStatic { |
423 public: | 648 public: |
424 enum Type { | 649 enum Type { |
425 OBJECT_LITERAL_FAST_ELEMENTS, | 650 OBJECT_LITERAL_FAST_ELEMENTS, |
426 OBJECT_LITERAL_SLOW_ELEMENTS, | 651 OBJECT_LITERAL_SLOW_ELEMENTS, |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
487 // return a null handle. Primarily for readability. | 712 // return a null handle. Primarily for readability. |
488 Handle<Object> ReportUnexpectedToken() { return Handle<Object>::null(); } | 713 Handle<Object> ReportUnexpectedToken() { return Handle<Object>::null(); } |
489 // Converts the currently parsed literal to a JavaScript String. | 714 // Converts the currently parsed literal to a JavaScript String. |
490 Handle<String> GetString(); | 715 Handle<String> GetString(); |
491 | 716 |
492 Scanner scanner_; | 717 Scanner scanner_; |
493 }; | 718 }; |
494 } } // namespace v8::internal | 719 } } // namespace v8::internal |
495 | 720 |
496 #endif // V8_PARSER_H_ | 721 #endif // V8_PARSER_H_ |
OLD | NEW |