Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(482)

Side by Side Diff: runtime/vm/regexp_parser.h

Issue 683433003: Integrate the Irregexp Regular Expression Engine. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: more comments Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/regexp_ast.cc ('k') | runtime/vm/regexp_parser.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #ifndef VM_REGEXP_PARSER_H_ 5 #ifndef VM_REGEXP_PARSER_H_
6 #define VM_REGEXP_PARSER_H_ 6 #define VM_REGEXP_PARSER_H_
7 7
8 // SNIP 8 #include "vm/allocation.h"
9 #include "vm/growable_array.h"
10 #include "vm/regexp_ast.h"
9 11
10 namespace dart { 12 namespace dart {
11 13
12 // SNIP 14 // Accumulates RegExp atoms and assertions into lists of terms and alternatives.
15 class RegExpBuilder: public ZoneAllocated {
16 public:
17 RegExpBuilder();
13 18
14 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. 19 void AddCharacter(uint16_t character);
15 class RegExpBuilder: public ZoneObject {
16 public:
17 explicit RegExpBuilder(Zone* zone);
18 void AddCharacter(uc16 character);
19 // "Adds" an empty expression. Does nothing except consume a 20 // "Adds" an empty expression. Does nothing except consume a
20 // following quantifier 21 // following quantifier
21 void AddEmpty(); 22 void AddEmpty();
22 void AddAtom(RegExpTree* tree); 23 void AddAtom(RegExpTree* tree);
23 void AddAssertion(RegExpTree* tree); 24 void AddAssertion(RegExpTree* tree);
24 void NewAlternative(); // '|' 25 void NewAlternative(); // '|'
25 void AddQuantifierToAtom( 26 void AddQuantifierToAtom(
26 int min, int max, RegExpQuantifier::QuantifierType type); 27 intptr_t min, intptr_t max, RegExpQuantifier::QuantifierType type);
27 RegExpTree* ToRegExp(); 28 RegExpTree* ToRegExp();
28 29
29 private: 30 private:
30 void FlushCharacters(); 31 void FlushCharacters();
31 void FlushText(); 32 void FlushText();
32 void FlushTerms(); 33 void FlushTerms();
33 Zone* zone() const { return zone_; }
34 34
35 Zone* zone_; 35 Isolate* isolate() const { return isolate_; }
36
37 Isolate* isolate_;
36 bool pending_empty_; 38 bool pending_empty_;
37 ZoneList<uc16>* characters_; 39 ZoneGrowableArray<uint16_t>* characters_;
38 BufferedZoneList<RegExpTree, 2> terms_; 40 GrowableArray<RegExpTree*> terms_;
39 BufferedZoneList<RegExpTree, 2> text_; 41 GrowableArray<RegExpTree*> text_;
40 BufferedZoneList<RegExpTree, 2> alternatives_; 42 GrowableArray<RegExpTree*> alternatives_;
41 #ifdef DEBUG 43 #ifdef DEBUG
42 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; 44 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;
43 #define LAST(x) last_added_ = x; 45 #define LAST(x) last_added_ = x;
44 #else 46 #else
45 #define LAST(x) 47 #define LAST(x)
46 #endif 48 #endif
47 }; 49 };
48 50
51 class RegExpParser : public ValueObject {
52 public:
53 RegExpParser(const String& in,
54 String* error,
55 bool multiline_mode);
49 56
50 class RegExpParser BASE_EMBEDDED { 57 static bool ParseFunction(ParsedFunction* parsed_function);
51 public:
52 RegExpParser(FlatStringReader* in,
53 Handle<String>* error,
54 bool multiline_mode,
55 Zone* zone);
56 58
57 static bool ParseRegExp(FlatStringReader* input, 59 static bool ParseRegExp(const String& input,
58 bool multiline, 60 bool multiline,
59 RegExpCompileData* result, 61 RegExpCompileData* result);
60 Zone* zone);
61 62
62 RegExpTree* ParsePattern(); 63 RegExpTree* ParsePattern();
63 RegExpTree* ParseDisjunction(); 64 RegExpTree* ParseDisjunction();
64 RegExpTree* ParseGroup(); 65 RegExpTree* ParseGroup();
65 RegExpTree* ParseCharacterClass(); 66 RegExpTree* ParseCharacterClass();
66 67
67 // Parses a {...,...} quantifier and stores the range in the given 68 // Parses a {...,...} quantifier and stores the range in the given
68 // out parameters. 69 // out parameters.
69 bool ParseIntervalQuantifier(int* min_out, int* max_out); 70 bool ParseIntervalQuantifier(intptr_t* min_out, intptr_t* max_out);
70 71
71 // Parses and returns a single escaped character. The character 72 // Parses and returns a single escaped character. The character
72 // must not be 'b' or 'B' since they are usually handle specially. 73 // must not be 'b' or 'B' since they are usually handle specially.
73 uc32 ParseClassCharacterEscape(); 74 uint32_t ParseClassCharacterEscape();
74 75
75 // Checks whether the following is a length-digit hexadecimal number, 76 // Checks whether the following is a length-digit hexadecimal number,
76 // and sets the value if it is. 77 // and sets the value if it is.
77 bool ParseHexEscape(int length, uc32* value); 78 bool ParseHexEscape(intptr_t length, uint32_t* value);
78 79
79 uc32 ParseOctalLiteral(); 80 uint32_t ParseOctalLiteral();
80 81
81 // Tries to parse the input as a back reference. If successful it 82 // Tries to parse the input as a back reference. If successful it
82 // stores the result in the output parameter and returns true. If 83 // stores the result in the output parameter and returns true. If
83 // it fails it will push back the characters read so the same characters 84 // it fails it will push back the characters read so the same characters
84 // can be reparsed. 85 // can be reparsed.
85 bool ParseBackReferenceIndex(int* index_out); 86 bool ParseBackReferenceIndex(intptr_t* index_out);
86 87
87 CharacterRange ParseClassAtom(uc16* char_class); 88 CharacterRange ParseClassAtom(uint16_t* char_class);
88 RegExpTree* ReportError(Vector<const char> message); 89 void ReportError(const char* message);
89 void Advance(); 90 void Advance();
90 void Advance(int dist); 91 void Advance(intptr_t dist);
91 void Reset(int pos); 92 void Reset(intptr_t pos);
92 93
93 // Reports whether the pattern might be used as a literal search string. 94 // Reports whether the pattern might be used as a literal search string.
94 // Only use if the result of the parse is a single atom node. 95 // Only use if the result of the parse is a single atom node.
95 bool simple(); 96 bool simple();
96 bool contains_anchor() { return contains_anchor_; } 97 bool contains_anchor() { return contains_anchor_; }
97 void set_contains_anchor() { contains_anchor_ = true; } 98 void set_contains_anchor() { contains_anchor_ = true; }
98 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } 99 intptr_t captures_started() { return captures_ == NULL ?
99 int position() { return next_pos_ - 1; } 100 0 : captures_->length(); }
101 intptr_t position() { return next_pos_ - 1; }
100 bool failed() { return failed_; } 102 bool failed() { return failed_; }
101 103
102 static const int kMaxCaptures = 1 << 16; 104 static const intptr_t kMaxCaptures = 1 << 16;
103 static const uc32 kEndMarker = (1 << 21); 105 static const uint32_t kEndMarker = (1 << 21);
104 106
105 private: 107 private:
106 enum SubexpressionType { 108 enum SubexpressionType {
107 INITIAL, 109 INITIAL,
108 CAPTURE, // All positive values represent captures. 110 CAPTURE, // All positive values represent captures.
109 POSITIVE_LOOKAHEAD, 111 POSITIVE_LOOKAHEAD,
110 NEGATIVE_LOOKAHEAD, 112 NEGATIVE_LOOKAHEAD,
111 GROUPING 113 GROUPING
112 }; 114 };
113 115
114 class RegExpParserState : public ZoneObject { 116 class RegExpParserState : public ZoneAllocated {
115 public: 117 public:
116 RegExpParserState(RegExpParserState* previous_state, 118 RegExpParserState(RegExpParserState* previous_state,
117 SubexpressionType group_type, 119 SubexpressionType group_type,
118 int disjunction_capture_index, 120 intptr_t disjunction_capture_index,
119 Zone* zone) 121 Isolate *isolate)
120 : previous_state_(previous_state), 122 : previous_state_(previous_state),
121 builder_(new(zone) RegExpBuilder(zone)), 123 builder_(new(isolate) RegExpBuilder()),
122 group_type_(group_type), 124 group_type_(group_type),
123 disjunction_capture_index_(disjunction_capture_index) {} 125 disjunction_capture_index_(disjunction_capture_index) {}
124 // Parser state of containing expression, if any. 126 // Parser state of containing expression, if any.
125 RegExpParserState* previous_state() { return previous_state_; } 127 RegExpParserState* previous_state() { return previous_state_; }
126 bool IsSubexpression() { return previous_state_ != NULL; } 128 bool IsSubexpression() { return previous_state_ != NULL; }
127 // RegExpBuilder building this regexp's AST. 129 // RegExpBuilder building this regexp's AST.
128 RegExpBuilder* builder() { return builder_; } 130 RegExpBuilder* builder() { return builder_; }
129 // Type of regexp being parsed (parenthesized group or entire regexp). 131 // Type of regexp being parsed (parenthesized group or entire regexp).
130 SubexpressionType group_type() { return group_type_; } 132 SubexpressionType group_type() { return group_type_; }
131 // Index in captures array of first capture in this sub-expression, if any. 133 // Index in captures array of first capture in this sub-expression, if any.
132 // Also the capture index of this sub-expression itself, if group_type 134 // Also the capture index of this sub-expression itself, if group_type
133 // is CAPTURE. 135 // is CAPTURE.
134 int capture_index() { return disjunction_capture_index_; } 136 intptr_t capture_index() { return disjunction_capture_index_; }
135 137
136 private: 138 private:
137 // Linked list implementation of stack of states. 139 // Linked list implementation of stack of states.
138 RegExpParserState* previous_state_; 140 RegExpParserState* previous_state_;
139 // Builder for the stored disjunction. 141 // Builder for the stored disjunction.
140 RegExpBuilder* builder_; 142 RegExpBuilder* builder_;
141 // Stored disjunction type (capture, look-ahead or grouping), if any. 143 // Stored disjunction type (capture, look-ahead or grouping), if any.
142 SubexpressionType group_type_; 144 SubexpressionType group_type_;
143 // Stored disjunction's capture index (if any). 145 // Stored disjunction's capture index (if any).
144 int disjunction_capture_index_; 146 intptr_t disjunction_capture_index_;
145 }; 147 };
146 148
147 Isolate* isolate() { return isolate_; } 149 Isolate* isolate() { return isolate_; }
148 Zone* zone() const { return zone_; }
149 150
150 uc32 current() { return current_; } 151 uint32_t current() { return current_; }
151 bool has_more() { return has_more_; } 152 bool has_more() { return has_more_; }
152 bool has_next() { return next_pos_ < in()->length(); } 153 bool has_next() { return next_pos_ < in().Length(); }
153 uc32 Next(); 154 uint32_t Next();
154 FlatStringReader* in() { return in_; } 155 const String& in() { return in_; }
155 void ScanForCaptures(); 156 void ScanForCaptures();
156 157
157 Isolate* isolate_; 158 Isolate* isolate_;
158 Zone* zone_; 159 String* error_;
159 Handle<String>* error_; 160 ZoneGrowableArray<RegExpCapture*>* captures_;
160 ZoneList<RegExpCapture*>* captures_; 161 const String& in_;
161 FlatStringReader* in_; 162 uint32_t current_;
162 uc32 current_; 163 intptr_t next_pos_;
163 int next_pos_;
164 // The capture count is only valid after we have scanned for captures. 164 // The capture count is only valid after we have scanned for captures.
165 int capture_count_; 165 intptr_t capture_count_;
166 bool has_more_; 166 bool has_more_;
167 bool multiline_; 167 bool multiline_;
168 bool simple_; 168 bool simple_;
169 bool contains_anchor_; 169 bool contains_anchor_;
170 bool is_scanned_for_captures_; 170 bool is_scanned_for_captures_;
171 bool failed_; 171 bool failed_;
172 }; 172 };
173 173
174 // SNIP
175
176 } // namespace dart 174 } // namespace dart
177 175
178 #endif // VM_REGEXP_PARSER_H_ 176 #endif // VM_REGEXP_PARSER_H_
OLDNEW
« no previous file with comments | « runtime/vm/regexp_ast.cc ('k') | runtime/vm/regexp_parser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698