Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(173)

Side by Side Diff: src/regexp/regexp-parser.h

Issue 1618753002: Revert of [regexp] implement character classes for unicode regexps. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/regexp/regexp-ast.cc ('k') | src/regexp/regexp-parser.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef V8_REGEXP_REGEXP_PARSER_H_ 5 #ifndef V8_REGEXP_REGEXP_PARSER_H_
6 #define V8_REGEXP_REGEXP_PARSER_H_ 6 #define V8_REGEXP_REGEXP_PARSER_H_
7 7
8 #include "src/objects.h" 8 #include "src/objects.h"
9 #include "src/regexp/regexp-ast.h" 9 #include "src/regexp/regexp-ast.h"
10 #include "src/zone.h" 10 #include "src/zone.h"
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
92 92
93 private: 93 private:
94 ZoneList<T*>* list_; 94 ZoneList<T*>* list_;
95 T* last_; 95 T* last_;
96 }; 96 };
97 97
98 98
99 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. 99 // Accumulates RegExp atoms and assertions into lists of terms and alternatives.
100 class RegExpBuilder : public ZoneObject { 100 class RegExpBuilder : public ZoneObject {
101 public: 101 public:
102 RegExpBuilder(Zone* zone, JSRegExp::Flags flags); 102 explicit RegExpBuilder(Zone* zone);
103 void AddCharacter(uc16 character); 103 void AddCharacter(uc16 character);
104 void AddUnicodeCharacter(uc32 character); 104 void AddUnicodeCharacter(uc32 character);
105 // "Adds" an empty expression. Does nothing except consume a 105 // "Adds" an empty expression. Does nothing except consume a
106 // following quantifier 106 // following quantifier
107 void AddEmpty(); 107 void AddEmpty();
108 void AddCharacterClass(RegExpCharacterClass* cc);
109 void AddAtom(RegExpTree* tree); 108 void AddAtom(RegExpTree* tree);
110 void AddTerm(RegExpTree* tree);
111 void AddAssertion(RegExpTree* tree); 109 void AddAssertion(RegExpTree* tree);
112 void NewAlternative(); // '|' 110 void NewAlternative(); // '|'
113 void AddQuantifierToAtom(int min, int max, 111 void AddQuantifierToAtom(int min, int max,
114 RegExpQuantifier::QuantifierType type); 112 RegExpQuantifier::QuantifierType type);
115 RegExpTree* ToRegExp(); 113 RegExpTree* ToRegExp();
116 114
117 private: 115 private:
118 static const uc16 kNoPendingSurrogate = 0;
119 void AddLeadSurrogate(uc16 lead_surrogate);
120 void AddTrailSurrogate(uc16 trail_surrogate);
121 void FlushPendingSurrogate();
122 void FlushCharacters(); 116 void FlushCharacters();
123 void FlushText(); 117 void FlushText();
124 void FlushTerms(); 118 void FlushTerms();
125 Zone* zone() const { return zone_; } 119 Zone* zone() const { return zone_; }
126 bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
127 120
128 Zone* zone_; 121 Zone* zone_;
129 bool pending_empty_; 122 bool pending_empty_;
130 JSRegExp::Flags flags_;
131 ZoneList<uc16>* characters_; 123 ZoneList<uc16>* characters_;
132 uc16 pending_surrogate_;
133 BufferedZoneList<RegExpTree, 2> terms_; 124 BufferedZoneList<RegExpTree, 2> terms_;
134 BufferedZoneList<RegExpTree, 2> text_; 125 BufferedZoneList<RegExpTree, 2> text_;
135 BufferedZoneList<RegExpTree, 2> alternatives_; 126 BufferedZoneList<RegExpTree, 2> alternatives_;
136 #ifdef DEBUG 127 #ifdef DEBUG
137 enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_; 128 enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_;
138 #define LAST(x) last_added_ = x; 129 #define LAST(x) last_added_ = x;
139 #else 130 #else
140 #define LAST(x) 131 #define LAST(x)
141 #endif 132 #endif
142 }; 133 };
143 134
144 135
145 class RegExpParser BASE_EMBEDDED { 136 class RegExpParser BASE_EMBEDDED {
146 public: 137 public:
147 RegExpParser(FlatStringReader* in, Handle<String>* error, 138 RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode,
148 JSRegExp::Flags flags, Isolate* isolate, Zone* zone); 139 bool unicode, Isolate* isolate, Zone* zone);
149 140
150 static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input, 141 static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input,
151 JSRegExp::Flags flags, RegExpCompileData* result); 142 bool multiline, bool unicode,
143 RegExpCompileData* result);
152 144
153 RegExpTree* ParsePattern(); 145 RegExpTree* ParsePattern();
154 RegExpTree* ParseDisjunction(); 146 RegExpTree* ParseDisjunction();
155 RegExpTree* ParseGroup(); 147 RegExpTree* ParseGroup();
156 RegExpTree* ParseCharacterClass(); 148 RegExpTree* ParseCharacterClass();
157 149
158 // Parses a {...,...} quantifier and stores the range in the given 150 // Parses a {...,...} quantifier and stores the range in the given
159 // out parameters. 151 // out parameters.
160 bool ParseIntervalQuantifier(int* min_out, int* max_out); 152 bool ParseIntervalQuantifier(int* min_out, int* max_out);
161 153
(...skipping 22 matching lines...) Expand all
184 void Reset(int pos); 176 void Reset(int pos);
185 177
186 // Reports whether the pattern might be used as a literal search string. 178 // Reports whether the pattern might be used as a literal search string.
187 // Only use if the result of the parse is a single atom node. 179 // Only use if the result of the parse is a single atom node.
188 bool simple(); 180 bool simple();
189 bool contains_anchor() { return contains_anchor_; } 181 bool contains_anchor() { return contains_anchor_; }
190 void set_contains_anchor() { contains_anchor_ = true; } 182 void set_contains_anchor() { contains_anchor_ = true; }
191 int captures_started() { return captures_started_; } 183 int captures_started() { return captures_started_; }
192 int position() { return next_pos_ - 1; } 184 int position() { return next_pos_ - 1; }
193 bool failed() { return failed_; } 185 bool failed() { return failed_; }
194 bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
195 bool multiline() const { return (flags_ & JSRegExp::kMultiline) != 0; }
196 186
197 static bool IsSyntaxCharacter(uc32 c); 187 static bool IsSyntaxCharacter(uc32 c);
198 188
199 static const int kMaxCaptures = 1 << 16; 189 static const int kMaxCaptures = 1 << 16;
200 static const uc32 kEndMarker = (1 << 21); 190 static const uc32 kEndMarker = (1 << 21);
201 191
202 private: 192 private:
203 enum SubexpressionType { 193 enum SubexpressionType {
204 INITIAL, 194 INITIAL,
205 CAPTURE, // All positive values represent captures. 195 CAPTURE, // All positive values represent captures.
206 POSITIVE_LOOKAROUND, 196 POSITIVE_LOOKAROUND,
207 NEGATIVE_LOOKAROUND, 197 NEGATIVE_LOOKAROUND,
208 GROUPING 198 GROUPING
209 }; 199 };
210 200
211 class RegExpParserState : public ZoneObject { 201 class RegExpParserState : public ZoneObject {
212 public: 202 public:
213 RegExpParserState(RegExpParserState* previous_state, 203 RegExpParserState(RegExpParserState* previous_state,
214 SubexpressionType group_type, 204 SubexpressionType group_type,
215 RegExpLookaround::Type lookaround_type, 205 RegExpLookaround::Type lookaround_type,
216 int disjunction_capture_index, JSRegExp::Flags flags, 206 int disjunction_capture_index, Zone* zone)
217 Zone* zone)
218 : previous_state_(previous_state), 207 : previous_state_(previous_state),
219 builder_(new (zone) RegExpBuilder(zone, flags)), 208 builder_(new (zone) RegExpBuilder(zone)),
220 group_type_(group_type), 209 group_type_(group_type),
221 lookaround_type_(lookaround_type), 210 lookaround_type_(lookaround_type),
222 disjunction_capture_index_(disjunction_capture_index) {} 211 disjunction_capture_index_(disjunction_capture_index) {}
223 // Parser state of containing expression, if any. 212 // Parser state of containing expression, if any.
224 RegExpParserState* previous_state() { return previous_state_; } 213 RegExpParserState* previous_state() { return previous_state_; }
225 bool IsSubexpression() { return previous_state_ != NULL; } 214 bool IsSubexpression() { return previous_state_ != NULL; }
226 // RegExpBuilder building this regexp's AST. 215 // RegExpBuilder building this regexp's AST.
227 RegExpBuilder* builder() { return builder_; } 216 RegExpBuilder* builder() { return builder_; }
228 // Type of regexp being parsed (parenthesized group or entire regexp). 217 // Type of regexp being parsed (parenthesized group or entire regexp).
229 SubexpressionType group_type() { return group_type_; } 218 SubexpressionType group_type() { return group_type_; }
(...skipping 23 matching lines...) Expand all
253 // Return the 1-indexed RegExpCapture object, allocate if necessary. 242 // Return the 1-indexed RegExpCapture object, allocate if necessary.
254 RegExpCapture* GetCapture(int index); 243 RegExpCapture* GetCapture(int index);
255 244
256 Isolate* isolate() { return isolate_; } 245 Isolate* isolate() { return isolate_; }
257 Zone* zone() const { return zone_; } 246 Zone* zone() const { return zone_; }
258 247
259 uc32 current() { return current_; } 248 uc32 current() { return current_; }
260 bool has_more() { return has_more_; } 249 bool has_more() { return has_more_; }
261 bool has_next() { return next_pos_ < in()->length(); } 250 bool has_next() { return next_pos_ < in()->length(); }
262 uc32 Next(); 251 uc32 Next();
263 template <bool update_position>
264 uc32 ReadNext();
265 FlatStringReader* in() { return in_; } 252 FlatStringReader* in() { return in_; }
266 void ScanForCaptures(); 253 void ScanForCaptures();
267 254
268 Isolate* isolate_; 255 Isolate* isolate_;
269 Zone* zone_; 256 Zone* zone_;
270 Handle<String>* error_; 257 Handle<String>* error_;
271 ZoneList<RegExpCapture*>* captures_; 258 ZoneList<RegExpCapture*>* captures_;
272 FlatStringReader* in_; 259 FlatStringReader* in_;
273 uc32 current_; 260 uc32 current_;
274 JSRegExp::Flags flags_;
275 int next_pos_; 261 int next_pos_;
276 int captures_started_; 262 int captures_started_;
277 // The capture count is only valid after we have scanned for captures. 263 // The capture count is only valid after we have scanned for captures.
278 int capture_count_; 264 int capture_count_;
279 bool has_more_; 265 bool has_more_;
266 bool multiline_;
267 bool unicode_;
280 bool simple_; 268 bool simple_;
281 bool contains_anchor_; 269 bool contains_anchor_;
282 bool is_scanned_for_captures_; 270 bool is_scanned_for_captures_;
283 bool failed_; 271 bool failed_;
284 }; 272 };
285 273
286 } // namespace internal 274 } // namespace internal
287 } // namespace v8 275 } // namespace v8
288 276
289 #endif // V8_REGEXP_REGEXP_PARSER_H_ 277 #endif // V8_REGEXP_REGEXP_PARSER_H_
OLDNEW
« no previous file with comments | « src/regexp/regexp-ast.cc ('k') | src/regexp/regexp-parser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698