Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(30)

Side by Side Diff: runtime/vm/regexp_parser.h

Issue 539153002: Port and integrate the irregexp engine from V8 (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Addressed Ivan's comments. Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/regexp_ast.cc ('k') | runtime/vm/regexp_parser.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 #ifndef VM_REGEXP_PARSER_H_
6 #define VM_REGEXP_PARSER_H_
7
8 #include "vm/allocation.h"
9 #include "vm/growable_array.h"
10 #include "vm/regexp_ast.h"
11
12 namespace dart {
13
14 // Accumulates RegExp atoms and assertions into lists of terms and alternatives.
15 class RegExpBuilder: public ZoneAllocated {
16 public:
17 RegExpBuilder();
18
19 void AddCharacter(uint16_t character);
20 // "Adds" an empty expression. Does nothing except consume a
21 // following quantifier
22 void AddEmpty();
23 void AddAtom(RegExpTree* tree);
24 void AddAssertion(RegExpTree* tree);
25 void NewAlternative(); // '|'
26 void AddQuantifierToAtom(
27 intptr_t min, intptr_t max, RegExpQuantifier::QuantifierType type);
28 RegExpTree* ToRegExp();
29
30 private:
31 void FlushCharacters();
32 void FlushText();
33 void FlushTerms();
34
35 Isolate* isolate() const { return isolate_; }
36
37 Isolate* isolate_;
38 bool pending_empty_;
39 ZoneGrowableArray<uint16_t>* characters_;
40 GrowableArray<RegExpTree*> terms_;
41 GrowableArray<RegExpTree*> text_;
42 GrowableArray<RegExpTree*> alternatives_;
43 #ifdef DEBUG
44 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;
45 #define LAST(x) last_added_ = x;
46 #else
47 #define LAST(x)
48 #endif
49 };
50
51 class RegExpParser : public ValueObject {
52 public:
53 RegExpParser(const String& in,
54 String* error,
55 bool multiline_mode);
56
57 static bool ParseFunction(ParsedFunction* parsed_function);
58
59 static bool ParseRegExp(const String& input,
60 bool multiline,
61 RegExpCompileData* result);
62
63 RegExpTree* ParsePattern();
64 RegExpTree* ParseDisjunction();
65 RegExpTree* ParseGroup();
66 RegExpTree* ParseCharacterClass();
67
68 // Parses a {...,...} quantifier and stores the range in the given
69 // out parameters.
70 bool ParseIntervalQuantifier(intptr_t* min_out, intptr_t* max_out);
71
72 // Parses and returns a single escaped character. The character
73 // must not be 'b' or 'B' since they are usually handle specially.
74 uint32_t ParseClassCharacterEscape();
75
76 // Checks whether the following is a length-digit hexadecimal number,
77 // and sets the value if it is.
78 bool ParseHexEscape(intptr_t length, uint32_t* value);
79
80 uint32_t ParseOctalLiteral();
81
82 // Tries to parse the input as a back reference. If successful it
83 // stores the result in the output parameter and returns true. If
84 // it fails it will push back the characters read so the same characters
85 // can be reparsed.
86 bool ParseBackReferenceIndex(intptr_t* index_out);
87
88 CharacterRange ParseClassAtom(uint16_t* char_class);
89 void ReportError(const char* message);
90 void Advance();
91 void Advance(intptr_t dist);
92 void Reset(intptr_t pos);
93
94 // Reports whether the pattern might be used as a literal search string.
95 // Only use if the result of the parse is a single atom node.
96 bool simple() { return simple_; }
97 bool contains_anchor() { return contains_anchor_; }
98 void set_contains_anchor() { contains_anchor_ = true; }
99 intptr_t captures_started() { return captures_ == NULL ?
100 0 : captures_->length(); }
101 intptr_t position() { return next_pos_ - 1; }
102 bool failed() { return failed_; }
103
104 static const intptr_t kMaxCaptures = 1 << 16;
105 static const uint32_t kEndMarker = (1 << 21);
106
107 private:
108 enum SubexpressionType {
109 INITIAL,
110 CAPTURE, // All positive values represent captures.
111 POSITIVE_LOOKAHEAD,
112 NEGATIVE_LOOKAHEAD,
113 GROUPING
114 };
115
116 class RegExpParserState : public ZoneAllocated {
117 public:
118 RegExpParserState(RegExpParserState* previous_state,
119 SubexpressionType group_type,
120 intptr_t disjunction_capture_index,
121 Isolate *isolate)
122 : previous_state_(previous_state),
123 builder_(new(isolate) RegExpBuilder()),
124 group_type_(group_type),
125 disjunction_capture_index_(disjunction_capture_index) {}
126 // Parser state of containing expression, if any.
127 RegExpParserState* previous_state() { return previous_state_; }
128 bool IsSubexpression() { return previous_state_ != NULL; }
129 // RegExpBuilder building this regexp's AST.
130 RegExpBuilder* builder() { return builder_; }
131 // Type of regexp being parsed (parenthesized group or entire regexp).
132 SubexpressionType group_type() { return group_type_; }
133 // Index in captures array of first capture in this sub-expression, if any.
134 // Also the capture index of this sub-expression itself, if group_type
135 // is CAPTURE.
136 intptr_t capture_index() { return disjunction_capture_index_; }
137
138 private:
139 // Linked list implementation of stack of states.
140 RegExpParserState* previous_state_;
141 // Builder for the stored disjunction.
142 RegExpBuilder* builder_;
143 // Stored disjunction type (capture, look-ahead or grouping), if any.
144 SubexpressionType group_type_;
145 // Stored disjunction's capture index (if any).
146 intptr_t disjunction_capture_index_;
147 };
148
149 Isolate* isolate() { return isolate_; }
150
151 uint32_t current() { return current_; }
152 bool has_more() { return has_more_; }
153 bool has_next() { return next_pos_ < in().Length(); }
154 uint32_t Next();
155 const String& in() { return in_; }
156 void ScanForCaptures();
157
158 Isolate* isolate_;
159 String* error_;
160 ZoneGrowableArray<RegExpCapture*>* captures_;
161 const String& in_;
162 uint32_t current_;
163 intptr_t next_pos_;
164 // The capture count is only valid after we have scanned for captures.
165 intptr_t capture_count_;
166 bool has_more_;
167 bool multiline_;
168 bool simple_;
169 bool contains_anchor_;
170 bool is_scanned_for_captures_;
171 bool failed_;
172 };
173
174 } // namespace dart
175
176 #endif // VM_REGEXP_PARSER_H_
OLDNEW
« no previous file with comments | « runtime/vm/regexp_ast.cc ('k') | runtime/vm/regexp_parser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698