Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(414)

Side by Side Diff: runtime/vm/regexp_parser.h

Issue 678193004: Copy irregexp related code from V8. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: rebase Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/regexp_ast.cc ('k') | runtime/vm/regexp_parser.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 #ifndef VM_REGEXP_PARSER_H_
6 #define VM_REGEXP_PARSER_H_
7
8 // SNIP
9
10 namespace dart {
11
12 // SNIP
13
14 // Accumulates RegExp atoms and assertions into lists of terms and alternatives.
15 class RegExpBuilder: public ZoneObject {
16 public:
17 explicit RegExpBuilder(Zone* zone);
18 void AddCharacter(uc16 character);
19 // "Adds" an empty expression. Does nothing except consume a
20 // following quantifier
21 void AddEmpty();
22 void AddAtom(RegExpTree* tree);
23 void AddAssertion(RegExpTree* tree);
24 void NewAlternative(); // '|'
25 void AddQuantifierToAtom(
26 int min, int max, RegExpQuantifier::QuantifierType type);
27 RegExpTree* ToRegExp();
28
29 private:
30 void FlushCharacters();
31 void FlushText();
32 void FlushTerms();
33 Zone* zone() const { return zone_; }
34
35 Zone* zone_;
36 bool pending_empty_;
37 ZoneList<uc16>* characters_;
38 BufferedZoneList<RegExpTree, 2> terms_;
39 BufferedZoneList<RegExpTree, 2> text_;
40 BufferedZoneList<RegExpTree, 2> alternatives_;
41 #ifdef DEBUG
42 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;
43 #define LAST(x) last_added_ = x;
44 #else
45 #define LAST(x)
46 #endif
47 };
48
49
50 class RegExpParser BASE_EMBEDDED {
51 public:
52 RegExpParser(FlatStringReader* in,
53 Handle<String>* error,
54 bool multiline_mode,
55 Zone* zone);
56
57 static bool ParseRegExp(FlatStringReader* input,
58 bool multiline,
59 RegExpCompileData* result,
60 Zone* zone);
61
62 RegExpTree* ParsePattern();
63 RegExpTree* ParseDisjunction();
64 RegExpTree* ParseGroup();
65 RegExpTree* ParseCharacterClass();
66
67 // Parses a {...,...} quantifier and stores the range in the given
68 // out parameters.
69 bool ParseIntervalQuantifier(int* min_out, int* max_out);
70
71 // Parses and returns a single escaped character. The character
72 // must not be 'b' or 'B' since they are usually handle specially.
73 uc32 ParseClassCharacterEscape();
74
75 // Checks whether the following is a length-digit hexadecimal number,
76 // and sets the value if it is.
77 bool ParseHexEscape(int length, uc32* value);
78
79 uc32 ParseOctalLiteral();
80
81 // Tries to parse the input as a back reference. If successful it
82 // stores the result in the output parameter and returns true. If
83 // it fails it will push back the characters read so the same characters
84 // can be reparsed.
85 bool ParseBackReferenceIndex(int* index_out);
86
87 CharacterRange ParseClassAtom(uc16* char_class);
88 RegExpTree* ReportError(Vector<const char> message);
89 void Advance();
90 void Advance(int dist);
91 void Reset(int pos);
92
93 // Reports whether the pattern might be used as a literal search string.
94 // Only use if the result of the parse is a single atom node.
95 bool simple();
96 bool contains_anchor() { return contains_anchor_; }
97 void set_contains_anchor() { contains_anchor_ = true; }
98 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); }
99 int position() { return next_pos_ - 1; }
100 bool failed() { return failed_; }
101
102 static const int kMaxCaptures = 1 << 16;
103 static const uc32 kEndMarker = (1 << 21);
104
105 private:
106 enum SubexpressionType {
107 INITIAL,
108 CAPTURE, // All positive values represent captures.
109 POSITIVE_LOOKAHEAD,
110 NEGATIVE_LOOKAHEAD,
111 GROUPING
112 };
113
114 class RegExpParserState : public ZoneObject {
115 public:
116 RegExpParserState(RegExpParserState* previous_state,
117 SubexpressionType group_type,
118 int disjunction_capture_index,
119 Zone* zone)
120 : previous_state_(previous_state),
121 builder_(new(zone) RegExpBuilder(zone)),
122 group_type_(group_type),
123 disjunction_capture_index_(disjunction_capture_index) {}
124 // Parser state of containing expression, if any.
125 RegExpParserState* previous_state() { return previous_state_; }
126 bool IsSubexpression() { return previous_state_ != NULL; }
127 // RegExpBuilder building this regexp's AST.
128 RegExpBuilder* builder() { return builder_; }
129 // Type of regexp being parsed (parenthesized group or entire regexp).
130 SubexpressionType group_type() { return group_type_; }
131 // Index in captures array of first capture in this sub-expression, if any.
132 // Also the capture index of this sub-expression itself, if group_type
133 // is CAPTURE.
134 int capture_index() { return disjunction_capture_index_; }
135
136 private:
137 // Linked list implementation of stack of states.
138 RegExpParserState* previous_state_;
139 // Builder for the stored disjunction.
140 RegExpBuilder* builder_;
141 // Stored disjunction type (capture, look-ahead or grouping), if any.
142 SubexpressionType group_type_;
143 // Stored disjunction's capture index (if any).
144 int disjunction_capture_index_;
145 };
146
147 Isolate* isolate() { return isolate_; }
148 Zone* zone() const { return zone_; }
149
150 uc32 current() { return current_; }
151 bool has_more() { return has_more_; }
152 bool has_next() { return next_pos_ < in()->length(); }
153 uc32 Next();
154 FlatStringReader* in() { return in_; }
155 void ScanForCaptures();
156
157 Isolate* isolate_;
158 Zone* zone_;
159 Handle<String>* error_;
160 ZoneList<RegExpCapture*>* captures_;
161 FlatStringReader* in_;
162 uc32 current_;
163 int next_pos_;
164 // The capture count is only valid after we have scanned for captures.
165 int capture_count_;
166 bool has_more_;
167 bool multiline_;
168 bool simple_;
169 bool contains_anchor_;
170 bool is_scanned_for_captures_;
171 bool failed_;
172 };
173
174 // SNIP
175
176 } // namespace dart
177
178 #endif // VM_REGEXP_PARSER_H_
OLDNEW
« no previous file with comments | « runtime/vm/regexp_ast.cc ('k') | runtime/vm/regexp_parser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698