Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(296)

Side by Side Diff: runtime/vm/regexp_parser.h

Issue 754383002: Revert "Integrate the Irregexp Regular Expression Engine." (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/regexp_ast.cc ('k') | runtime/vm/regexp_parser.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #ifndef VM_REGEXP_PARSER_H_ 5 #ifndef VM_REGEXP_PARSER_H_
6 #define VM_REGEXP_PARSER_H_ 6 #define VM_REGEXP_PARSER_H_
7 7
8 #include "vm/allocation.h" 8 // SNIP
9 #include "vm/growable_array.h"
10 #include "vm/regexp_ast.h"
11 9
12 namespace dart { 10 namespace dart {
13 11
12 // SNIP
13
14 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. 14 // Accumulates RegExp atoms and assertions into lists of terms and alternatives.
15 class RegExpBuilder: public ZoneAllocated { 15 class RegExpBuilder: public ZoneObject {
16 public: 16 public:
17 RegExpBuilder(); 17 explicit RegExpBuilder(Zone* zone);
18 18 void AddCharacter(uc16 character);
19 void AddCharacter(uint16_t character);
20 // "Adds" an empty expression. Does nothing except consume a 19 // "Adds" an empty expression. Does nothing except consume a
21 // following quantifier 20 // following quantifier
22 void AddEmpty(); 21 void AddEmpty();
23 void AddAtom(RegExpTree* tree); 22 void AddAtom(RegExpTree* tree);
24 void AddAssertion(RegExpTree* tree); 23 void AddAssertion(RegExpTree* tree);
25 void NewAlternative(); // '|' 24 void NewAlternative(); // '|'
26 void AddQuantifierToAtom( 25 void AddQuantifierToAtom(
27 intptr_t min, intptr_t max, RegExpQuantifier::QuantifierType type); 26 int min, int max, RegExpQuantifier::QuantifierType type);
28 RegExpTree* ToRegExp(); 27 RegExpTree* ToRegExp();
29 28
30 private: 29 private:
31 void FlushCharacters(); 30 void FlushCharacters();
32 void FlushText(); 31 void FlushText();
33 void FlushTerms(); 32 void FlushTerms();
33 Zone* zone() const { return zone_; }
34 34
35 Isolate* isolate() const { return isolate_; } 35 Zone* zone_;
36
37 Isolate* isolate_;
38 bool pending_empty_; 36 bool pending_empty_;
39 ZoneGrowableArray<uint16_t>* characters_; 37 ZoneList<uc16>* characters_;
40 GrowableArray<RegExpTree*> terms_; 38 BufferedZoneList<RegExpTree, 2> terms_;
41 GrowableArray<RegExpTree*> text_; 39 BufferedZoneList<RegExpTree, 2> text_;
42 GrowableArray<RegExpTree*> alternatives_; 40 BufferedZoneList<RegExpTree, 2> alternatives_;
43 #ifdef DEBUG 41 #ifdef DEBUG
44 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; 42 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;
45 #define LAST(x) last_added_ = x; 43 #define LAST(x) last_added_ = x;
46 #else 44 #else
47 #define LAST(x) 45 #define LAST(x)
48 #endif 46 #endif
49 }; 47 };
50 48
51 class RegExpParser : public ValueObject { 49
50 class RegExpParser BASE_EMBEDDED {
52 public: 51 public:
53 RegExpParser(const String& in, 52 RegExpParser(FlatStringReader* in,
54 String* error, 53 Handle<String>* error,
55 bool multiline_mode); 54 bool multiline_mode,
55 Zone* zone);
56 56
57 static bool ParseFunction(ParsedFunction* parsed_function); 57 static bool ParseRegExp(FlatStringReader* input,
58
59 static bool ParseRegExp(const String& input,
60 bool multiline, 58 bool multiline,
61 RegExpCompileData* result); 59 RegExpCompileData* result,
60 Zone* zone);
62 61
63 RegExpTree* ParsePattern(); 62 RegExpTree* ParsePattern();
64 RegExpTree* ParseDisjunction(); 63 RegExpTree* ParseDisjunction();
65 RegExpTree* ParseGroup(); 64 RegExpTree* ParseGroup();
66 RegExpTree* ParseCharacterClass(); 65 RegExpTree* ParseCharacterClass();
67 66
68 // Parses a {...,...} quantifier and stores the range in the given 67 // Parses a {...,...} quantifier and stores the range in the given
69 // out parameters. 68 // out parameters.
70 bool ParseIntervalQuantifier(intptr_t* min_out, intptr_t* max_out); 69 bool ParseIntervalQuantifier(int* min_out, int* max_out);
71 70
72 // Parses and returns a single escaped character. The character 71 // Parses and returns a single escaped character. The character
73 // must not be 'b' or 'B' since they are usually handle specially. 72 // must not be 'b' or 'B' since they are usually handle specially.
74 uint32_t ParseClassCharacterEscape(); 73 uc32 ParseClassCharacterEscape();
75 74
76 // Checks whether the following is a length-digit hexadecimal number, 75 // Checks whether the following is a length-digit hexadecimal number,
77 // and sets the value if it is. 76 // and sets the value if it is.
78 bool ParseHexEscape(intptr_t length, uint32_t* value); 77 bool ParseHexEscape(int length, uc32* value);
79 78
80 uint32_t ParseOctalLiteral(); 79 uc32 ParseOctalLiteral();
81 80
82 // Tries to parse the input as a back reference. If successful it 81 // Tries to parse the input as a back reference. If successful it
83 // stores the result in the output parameter and returns true. If 82 // stores the result in the output parameter and returns true. If
84 // it fails it will push back the characters read so the same characters 83 // it fails it will push back the characters read so the same characters
85 // can be reparsed. 84 // can be reparsed.
86 bool ParseBackReferenceIndex(intptr_t* index_out); 85 bool ParseBackReferenceIndex(int* index_out);
87 86
88 CharacterRange ParseClassAtom(uint16_t* char_class); 87 CharacterRange ParseClassAtom(uc16* char_class);
89 void ReportError(const char* message); 88 RegExpTree* ReportError(Vector<const char> message);
90 void Advance(); 89 void Advance();
91 void Advance(intptr_t dist); 90 void Advance(int dist);
92 void Reset(intptr_t pos); 91 void Reset(int pos);
93 92
94 // Reports whether the pattern might be used as a literal search string. 93 // Reports whether the pattern might be used as a literal search string.
95 // Only use if the result of the parse is a single atom node. 94 // Only use if the result of the parse is a single atom node.
96 bool simple(); 95 bool simple();
97 bool contains_anchor() { return contains_anchor_; } 96 bool contains_anchor() { return contains_anchor_; }
98 void set_contains_anchor() { contains_anchor_ = true; } 97 void set_contains_anchor() { contains_anchor_ = true; }
99 intptr_t captures_started() { return captures_ == NULL ? 98 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); }
100 0 : captures_->length(); } 99 int position() { return next_pos_ - 1; }
101 intptr_t position() { return next_pos_ - 1; }
102 bool failed() { return failed_; } 100 bool failed() { return failed_; }
103 101
104 static const intptr_t kMaxCaptures = 1 << 16; 102 static const int kMaxCaptures = 1 << 16;
105 static const uint32_t kEndMarker = (1 << 21); 103 static const uc32 kEndMarker = (1 << 21);
106 104
107 private: 105 private:
108 enum SubexpressionType { 106 enum SubexpressionType {
109 INITIAL, 107 INITIAL,
110 CAPTURE, // All positive values represent captures. 108 CAPTURE, // All positive values represent captures.
111 POSITIVE_LOOKAHEAD, 109 POSITIVE_LOOKAHEAD,
112 NEGATIVE_LOOKAHEAD, 110 NEGATIVE_LOOKAHEAD,
113 GROUPING 111 GROUPING
114 }; 112 };
115 113
116 class RegExpParserState : public ZoneAllocated { 114 class RegExpParserState : public ZoneObject {
117 public: 115 public:
118 RegExpParserState(RegExpParserState* previous_state, 116 RegExpParserState(RegExpParserState* previous_state,
119 SubexpressionType group_type, 117 SubexpressionType group_type,
120 intptr_t disjunction_capture_index, 118 int disjunction_capture_index,
121 Isolate *isolate) 119 Zone* zone)
122 : previous_state_(previous_state), 120 : previous_state_(previous_state),
123 builder_(new(isolate) RegExpBuilder()), 121 builder_(new(zone) RegExpBuilder(zone)),
124 group_type_(group_type), 122 group_type_(group_type),
125 disjunction_capture_index_(disjunction_capture_index) {} 123 disjunction_capture_index_(disjunction_capture_index) {}
126 // Parser state of containing expression, if any. 124 // Parser state of containing expression, if any.
127 RegExpParserState* previous_state() { return previous_state_; } 125 RegExpParserState* previous_state() { return previous_state_; }
128 bool IsSubexpression() { return previous_state_ != NULL; } 126 bool IsSubexpression() { return previous_state_ != NULL; }
129 // RegExpBuilder building this regexp's AST. 127 // RegExpBuilder building this regexp's AST.
130 RegExpBuilder* builder() { return builder_; } 128 RegExpBuilder* builder() { return builder_; }
131 // Type of regexp being parsed (parenthesized group or entire regexp). 129 // Type of regexp being parsed (parenthesized group or entire regexp).
132 SubexpressionType group_type() { return group_type_; } 130 SubexpressionType group_type() { return group_type_; }
133 // Index in captures array of first capture in this sub-expression, if any. 131 // Index in captures array of first capture in this sub-expression, if any.
134 // Also the capture index of this sub-expression itself, if group_type 132 // Also the capture index of this sub-expression itself, if group_type
135 // is CAPTURE. 133 // is CAPTURE.
136 intptr_t capture_index() { return disjunction_capture_index_; } 134 int capture_index() { return disjunction_capture_index_; }
137 135
138 private: 136 private:
139 // Linked list implementation of stack of states. 137 // Linked list implementation of stack of states.
140 RegExpParserState* previous_state_; 138 RegExpParserState* previous_state_;
141 // Builder for the stored disjunction. 139 // Builder for the stored disjunction.
142 RegExpBuilder* builder_; 140 RegExpBuilder* builder_;
143 // Stored disjunction type (capture, look-ahead or grouping), if any. 141 // Stored disjunction type (capture, look-ahead or grouping), if any.
144 SubexpressionType group_type_; 142 SubexpressionType group_type_;
145 // Stored disjunction's capture index (if any). 143 // Stored disjunction's capture index (if any).
146 intptr_t disjunction_capture_index_; 144 int disjunction_capture_index_;
147 }; 145 };
148 146
149 Isolate* isolate() { return isolate_; } 147 Isolate* isolate() { return isolate_; }
148 Zone* zone() const { return zone_; }
150 149
151 uint32_t current() { return current_; } 150 uc32 current() { return current_; }
152 bool has_more() { return has_more_; } 151 bool has_more() { return has_more_; }
153 bool has_next() { return next_pos_ < in().Length(); } 152 bool has_next() { return next_pos_ < in()->length(); }
154 uint32_t Next(); 153 uc32 Next();
155 const String& in() { return in_; } 154 FlatStringReader* in() { return in_; }
156 void ScanForCaptures(); 155 void ScanForCaptures();
157 156
158 Isolate* isolate_; 157 Isolate* isolate_;
159 String* error_; 158 Zone* zone_;
160 ZoneGrowableArray<RegExpCapture*>* captures_; 159 Handle<String>* error_;
161 const String& in_; 160 ZoneList<RegExpCapture*>* captures_;
162 uint32_t current_; 161 FlatStringReader* in_;
163 intptr_t next_pos_; 162 uc32 current_;
163 int next_pos_;
164 // The capture count is only valid after we have scanned for captures. 164 // The capture count is only valid after we have scanned for captures.
165 intptr_t capture_count_; 165 int capture_count_;
166 bool has_more_; 166 bool has_more_;
167 bool multiline_; 167 bool multiline_;
168 bool simple_; 168 bool simple_;
169 bool contains_anchor_; 169 bool contains_anchor_;
170 bool is_scanned_for_captures_; 170 bool is_scanned_for_captures_;
171 bool failed_; 171 bool failed_;
172 }; 172 };
173 173
174 // SNIP
175
174 } // namespace dart 176 } // namespace dart
175 177
176 #endif // VM_REGEXP_PARSER_H_ 178 #endif // VM_REGEXP_PARSER_H_
OLDNEW
« no previous file with comments | « runtime/vm/regexp_ast.cc ('k') | runtime/vm/regexp_parser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698