Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(317)

Unified Diff: runtime/vm/regexp_parser.h

Issue 678193004: Copy irregexp related code from V8. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: rebase Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « runtime/vm/regexp_ast.cc ('k') | runtime/vm/regexp_parser.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: runtime/vm/regexp_parser.h
diff --git a/runtime/vm/regexp_parser.h b/runtime/vm/regexp_parser.h
new file mode 100644
index 0000000000000000000000000000000000000000..42cedd4d3469e01eba9ceee433280c6d063d7e91
--- /dev/null
+++ b/runtime/vm/regexp_parser.h
@@ -0,0 +1,178 @@
+// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+#ifndef VM_REGEXP_PARSER_H_
+#define VM_REGEXP_PARSER_H_
+
+// SNIP
+
+namespace dart {
+
+// SNIP
+
+// Accumulates RegExp atoms and assertions into lists of terms and alternatives.
+class RegExpBuilder: public ZoneObject {
+ public:
+ explicit RegExpBuilder(Zone* zone);
+ void AddCharacter(uc16 character);
+ // "Adds" an empty expression. Does nothing except consume a
+ // following quantifier
+ void AddEmpty();
+ void AddAtom(RegExpTree* tree);
+ void AddAssertion(RegExpTree* tree);
+ void NewAlternative(); // '|'
+ void AddQuantifierToAtom(
+ int min, int max, RegExpQuantifier::QuantifierType type);
+ RegExpTree* ToRegExp();
+
+ private:
+ void FlushCharacters();
+ void FlushText();
+ void FlushTerms();
+ Zone* zone() const { return zone_; }
+
+ Zone* zone_;
+ bool pending_empty_;
+ ZoneList<uc16>* characters_;
+ BufferedZoneList<RegExpTree, 2> terms_;
+ BufferedZoneList<RegExpTree, 2> text_;
+ BufferedZoneList<RegExpTree, 2> alternatives_;
+#ifdef DEBUG
+ enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;
+#define LAST(x) last_added_ = x;
+#else
+#define LAST(x)
+#endif
+};
+
+
+class RegExpParser BASE_EMBEDDED {
+ public:
+ RegExpParser(FlatStringReader* in,
+ Handle<String>* error,
+ bool multiline_mode,
+ Zone* zone);
+
+ static bool ParseRegExp(FlatStringReader* input,
+ bool multiline,
+ RegExpCompileData* result,
+ Zone* zone);
+
+ RegExpTree* ParsePattern();
+ RegExpTree* ParseDisjunction();
+ RegExpTree* ParseGroup();
+ RegExpTree* ParseCharacterClass();
+
+ // Parses a {...,...} quantifier and stores the range in the given
+ // out parameters.
+ bool ParseIntervalQuantifier(int* min_out, int* max_out);
+
+ // Parses and returns a single escaped character. The character
+ // must not be 'b' or 'B' since they are usually handle specially.
+ uc32 ParseClassCharacterEscape();
+
+ // Checks whether the following is a length-digit hexadecimal number,
+ // and sets the value if it is.
+ bool ParseHexEscape(int length, uc32* value);
+
+ uc32 ParseOctalLiteral();
+
+ // Tries to parse the input as a back reference. If successful it
+ // stores the result in the output parameter and returns true. If
+ // it fails it will push back the characters read so the same characters
+ // can be reparsed.
+ bool ParseBackReferenceIndex(int* index_out);
+
+ CharacterRange ParseClassAtom(uc16* char_class);
+ RegExpTree* ReportError(Vector<const char> message);
+ void Advance();
+ void Advance(int dist);
+ void Reset(int pos);
+
+ // Reports whether the pattern might be used as a literal search string.
+ // Only use if the result of the parse is a single atom node.
+ bool simple();
+ bool contains_anchor() { return contains_anchor_; }
+ void set_contains_anchor() { contains_anchor_ = true; }
+ int captures_started() { return captures_ == NULL ? 0 : captures_->length(); }
+ int position() { return next_pos_ - 1; }
+ bool failed() { return failed_; }
+
+ static const int kMaxCaptures = 1 << 16;
+ static const uc32 kEndMarker = (1 << 21);
+
+ private:
+ enum SubexpressionType {
+ INITIAL,
+ CAPTURE, // All positive values represent captures.
+ POSITIVE_LOOKAHEAD,
+ NEGATIVE_LOOKAHEAD,
+ GROUPING
+ };
+
+ class RegExpParserState : public ZoneObject {
+ public:
+ RegExpParserState(RegExpParserState* previous_state,
+ SubexpressionType group_type,
+ int disjunction_capture_index,
+ Zone* zone)
+ : previous_state_(previous_state),
+ builder_(new(zone) RegExpBuilder(zone)),
+ group_type_(group_type),
+ disjunction_capture_index_(disjunction_capture_index) {}
+ // Parser state of containing expression, if any.
+ RegExpParserState* previous_state() { return previous_state_; }
+ bool IsSubexpression() { return previous_state_ != NULL; }
+ // RegExpBuilder building this regexp's AST.
+ RegExpBuilder* builder() { return builder_; }
+ // Type of regexp being parsed (parenthesized group or entire regexp).
+ SubexpressionType group_type() { return group_type_; }
+ // Index in captures array of first capture in this sub-expression, if any.
+ // Also the capture index of this sub-expression itself, if group_type
+ // is CAPTURE.
+ int capture_index() { return disjunction_capture_index_; }
+
+ private:
+ // Linked list implementation of stack of states.
+ RegExpParserState* previous_state_;
+ // Builder for the stored disjunction.
+ RegExpBuilder* builder_;
+ // Stored disjunction type (capture, look-ahead or grouping), if any.
+ SubexpressionType group_type_;
+ // Stored disjunction's capture index (if any).
+ int disjunction_capture_index_;
+ };
+
+ Isolate* isolate() { return isolate_; }
+ Zone* zone() const { return zone_; }
+
+ uc32 current() { return current_; }
+ bool has_more() { return has_more_; }
+ bool has_next() { return next_pos_ < in()->length(); }
+ uc32 Next();
+ FlatStringReader* in() { return in_; }
+ void ScanForCaptures();
+
+ Isolate* isolate_;
+ Zone* zone_;
+ Handle<String>* error_;
+ ZoneList<RegExpCapture*>* captures_;
+ FlatStringReader* in_;
+ uc32 current_;
+ int next_pos_;
+ // The capture count is only valid after we have scanned for captures.
+ int capture_count_;
+ bool has_more_;
+ bool multiline_;
+ bool simple_;
+ bool contains_anchor_;
+ bool is_scanned_for_captures_;
+ bool failed_;
+};
+
+// SNIP
+
+} // namespace dart
+
+#endif // VM_REGEXP_PARSER_H_
« no previous file with comments | « runtime/vm/regexp_ast.cc ('k') | runtime/vm/regexp_parser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698