Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(47)

Side by Side Diff: src/parsing/parser.h

Issue 1565183002: [regexp] move regexp parser into own files. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: fix test compile Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/objects.cc ('k') | src/parsing/parser.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef V8_PARSING_PARSER_H_ 5 #ifndef V8_PARSING_PARSER_H_
6 #define V8_PARSING_PARSER_H_ 6 #define V8_PARSING_PARSER_H_
7 7
8 #include "src/allocation.h" 8 #include "src/allocation.h"
9 #include "src/ast/ast.h" 9 #include "src/ast/ast.h"
10 #include "src/ast/scopes.h" 10 #include "src/ast/scopes.h"
(...skipping 271 matching lines...) Expand 10 before | Expand all | Expand 10 after
282 return script_data_->length() / sizeof(unsigned); 282 return script_data_->length() / sizeof(unsigned);
283 } 283 }
284 284
285 ScriptData* script_data_; 285 ScriptData* script_data_;
286 int function_index_; 286 int function_index_;
287 287
288 DISALLOW_COPY_AND_ASSIGN(ParseData); 288 DISALLOW_COPY_AND_ASSIGN(ParseData);
289 }; 289 };
290 290
291 // ---------------------------------------------------------------------------- 291 // ----------------------------------------------------------------------------
292 // REGEXP PARSING
293
294 // A BufferedZoneList is an automatically growing list, just like (and backed
295 // by) a ZoneList, that is optimized for the case of adding and removing
296 // a single element. The last element added is stored outside the backing list,
297 // and if no more than one element is ever added, the ZoneList isn't even
298 // allocated.
299 // Elements must not be NULL pointers.
300 template <typename T, int initial_size>
301 class BufferedZoneList {
302 public:
303 BufferedZoneList() : list_(NULL), last_(NULL) {}
304
305 // Adds element at end of list. This element is buffered and can
306 // be read using last() or removed using RemoveLast until a new Add or until
307 // RemoveLast or GetList has been called.
308 void Add(T* value, Zone* zone) {
309 if (last_ != NULL) {
310 if (list_ == NULL) {
311 list_ = new(zone) ZoneList<T*>(initial_size, zone);
312 }
313 list_->Add(last_, zone);
314 }
315 last_ = value;
316 }
317
318 T* last() {
319 DCHECK(last_ != NULL);
320 return last_;
321 }
322
323 T* RemoveLast() {
324 DCHECK(last_ != NULL);
325 T* result = last_;
326 if ((list_ != NULL) && (list_->length() > 0))
327 last_ = list_->RemoveLast();
328 else
329 last_ = NULL;
330 return result;
331 }
332
333 T* Get(int i) {
334 DCHECK((0 <= i) && (i < length()));
335 if (list_ == NULL) {
336 DCHECK_EQ(0, i);
337 return last_;
338 } else {
339 if (i == list_->length()) {
340 DCHECK(last_ != NULL);
341 return last_;
342 } else {
343 return list_->at(i);
344 }
345 }
346 }
347
348 void Clear() {
349 list_ = NULL;
350 last_ = NULL;
351 }
352
353 int length() {
354 int length = (list_ == NULL) ? 0 : list_->length();
355 return length + ((last_ == NULL) ? 0 : 1);
356 }
357
358 ZoneList<T*>* GetList(Zone* zone) {
359 if (list_ == NULL) {
360 list_ = new(zone) ZoneList<T*>(initial_size, zone);
361 }
362 if (last_ != NULL) {
363 list_->Add(last_, zone);
364 last_ = NULL;
365 }
366 return list_;
367 }
368
369 private:
370 ZoneList<T*>* list_;
371 T* last_;
372 };
373
374
375 // Accumulates RegExp atoms and assertions into lists of terms and alternatives.
376 class RegExpBuilder: public ZoneObject {
377 public:
378 explicit RegExpBuilder(Zone* zone);
379 void AddCharacter(uc16 character);
380 // "Adds" an empty expression. Does nothing except consume a
381 // following quantifier
382 void AddEmpty();
383 void AddAtom(RegExpTree* tree);
384 void AddAssertion(RegExpTree* tree);
385 void NewAlternative(); // '|'
386 void AddQuantifierToAtom(
387 int min, int max, RegExpQuantifier::QuantifierType type);
388 RegExpTree* ToRegExp();
389
390 private:
391 void FlushCharacters();
392 void FlushText();
393 void FlushTerms();
394 Zone* zone() const { return zone_; }
395
396 Zone* zone_;
397 bool pending_empty_;
398 ZoneList<uc16>* characters_;
399 BufferedZoneList<RegExpTree, 2> terms_;
400 BufferedZoneList<RegExpTree, 2> text_;
401 BufferedZoneList<RegExpTree, 2> alternatives_;
402 #ifdef DEBUG
403 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;
404 #define LAST(x) last_added_ = x;
405 #else
406 #define LAST(x)
407 #endif
408 };
409
410
411 class RegExpParser BASE_EMBEDDED {
412 public:
413 RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode,
414 bool unicode, Isolate* isolate, Zone* zone);
415
416 static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input,
417 bool multiline, bool unicode,
418 RegExpCompileData* result);
419
420 RegExpTree* ParsePattern();
421 RegExpTree* ParseDisjunction();
422 RegExpTree* ParseGroup();
423 RegExpTree* ParseCharacterClass();
424
425 // Parses a {...,...} quantifier and stores the range in the given
426 // out parameters.
427 bool ParseIntervalQuantifier(int* min_out, int* max_out);
428
429 // Parses and returns a single escaped character. The character
430 // must not be 'b' or 'B' since they are usually handle specially.
431 uc32 ParseClassCharacterEscape();
432
433 // Checks whether the following is a length-digit hexadecimal number,
434 // and sets the value if it is.
435 bool ParseHexEscape(int length, uc32* value);
436 bool ParseUnicodeEscape(uc32* value);
437 bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value);
438
439 uc32 ParseOctalLiteral();
440
441 // Tries to parse the input as a back reference. If successful it
442 // stores the result in the output parameter and returns true. If
443 // it fails it will push back the characters read so the same characters
444 // can be reparsed.
445 bool ParseBackReferenceIndex(int* index_out);
446
447 CharacterRange ParseClassAtom(uc16* char_class);
448 RegExpTree* ReportError(Vector<const char> message);
449 void Advance();
450 void Advance(int dist);
451 void Reset(int pos);
452
453 // Reports whether the pattern might be used as a literal search string.
454 // Only use if the result of the parse is a single atom node.
455 bool simple();
456 bool contains_anchor() { return contains_anchor_; }
457 void set_contains_anchor() { contains_anchor_ = true; }
458 int captures_started() { return captures_started_; }
459 int position() { return next_pos_ - 1; }
460 bool failed() { return failed_; }
461
462 static bool IsSyntaxCharacter(uc32 c);
463
464 static const int kMaxCaptures = 1 << 16;
465 static const uc32 kEndMarker = (1 << 21);
466
467 private:
468 enum SubexpressionType {
469 INITIAL,
470 CAPTURE, // All positive values represent captures.
471 POSITIVE_LOOKAROUND,
472 NEGATIVE_LOOKAROUND,
473 GROUPING
474 };
475
476 class RegExpParserState : public ZoneObject {
477 public:
478 RegExpParserState(RegExpParserState* previous_state,
479 SubexpressionType group_type,
480 RegExpLookaround::Type lookaround_type,
481 int disjunction_capture_index, Zone* zone)
482 : previous_state_(previous_state),
483 builder_(new (zone) RegExpBuilder(zone)),
484 group_type_(group_type),
485 lookaround_type_(lookaround_type),
486 disjunction_capture_index_(disjunction_capture_index) {}
487 // Parser state of containing expression, if any.
488 RegExpParserState* previous_state() { return previous_state_; }
489 bool IsSubexpression() { return previous_state_ != NULL; }
490 // RegExpBuilder building this regexp's AST.
491 RegExpBuilder* builder() { return builder_; }
492 // Type of regexp being parsed (parenthesized group or entire regexp).
493 SubexpressionType group_type() { return group_type_; }
494 // Lookahead or Lookbehind.
495 RegExpLookaround::Type lookaround_type() { return lookaround_type_; }
496 // Index in captures array of first capture in this sub-expression, if any.
497 // Also the capture index of this sub-expression itself, if group_type
498 // is CAPTURE.
499 int capture_index() { return disjunction_capture_index_; }
500
501 // Check whether the parser is inside a capture group with the given index.
502 bool IsInsideCaptureGroup(int index);
503
504 private:
505 // Linked list implementation of stack of states.
506 RegExpParserState* previous_state_;
507 // Builder for the stored disjunction.
508 RegExpBuilder* builder_;
509 // Stored disjunction type (capture, look-ahead or grouping), if any.
510 SubexpressionType group_type_;
511 // Stored read direction.
512 RegExpLookaround::Type lookaround_type_;
513 // Stored disjunction's capture index (if any).
514 int disjunction_capture_index_;
515 };
516
517 // Return the 1-indexed RegExpCapture object, allocate if necessary.
518 RegExpCapture* GetCapture(int index);
519
520 Isolate* isolate() { return isolate_; }
521 Zone* zone() const { return zone_; }
522
523 uc32 current() { return current_; }
524 bool has_more() { return has_more_; }
525 bool has_next() { return next_pos_ < in()->length(); }
526 uc32 Next();
527 FlatStringReader* in() { return in_; }
528 void ScanForCaptures();
529
530 Isolate* isolate_;
531 Zone* zone_;
532 Handle<String>* error_;
533 ZoneList<RegExpCapture*>* captures_;
534 FlatStringReader* in_;
535 uc32 current_;
536 int next_pos_;
537 int captures_started_;
538 // The capture count is only valid after we have scanned for captures.
539 int capture_count_;
540 bool has_more_;
541 bool multiline_;
542 bool unicode_;
543 bool simple_;
544 bool contains_anchor_;
545 bool is_scanned_for_captures_;
546 bool failed_;
547 };
548
549 // ----------------------------------------------------------------------------
550 // JAVASCRIPT PARSING 292 // JAVASCRIPT PARSING
551 293
552 class Parser; 294 class Parser;
553 class SingletonLogger; 295 class SingletonLogger;
554 296
555 297
556 struct ParserFormalParameters : FormalParametersBase { 298 struct ParserFormalParameters : FormalParametersBase {
557 struct Parameter { 299 struct Parameter {
558 Parameter(const AstRawString* name, Expression* pattern, 300 Parameter(const AstRawString* name, Expression* pattern,
559 Expression* initializer, int initializer_end_position, 301 Expression* initializer, int initializer_end_position,
(...skipping 890 matching lines...) Expand 10 before | Expand all | Expand 10 after
1450 1192
1451 DoExpression* ParserTraits::ParseDoExpression(bool* ok) { 1193 DoExpression* ParserTraits::ParseDoExpression(bool* ok) {
1452 return parser_->ParseDoExpression(ok); 1194 return parser_->ParseDoExpression(ok);
1453 } 1195 }
1454 1196
1455 1197
1456 } // namespace internal 1198 } // namespace internal
1457 } // namespace v8 1199 } // namespace v8
1458 1200
1459 #endif // V8_PARSING_PARSER_H_ 1201 #endif // V8_PARSING_PARSER_H_
OLDNEW
« no previous file with comments | « src/objects.cc ('k') | src/parsing/parser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698