Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(47)

Side by Side Diff: src/regexp/regexp-parser.h

Issue 1651073002: [regexp] do not store flags as bitfield in the parser. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | src/regexp/regexp-parser.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef V8_REGEXP_REGEXP_PARSER_H_ 5 #ifndef V8_REGEXP_REGEXP_PARSER_H_
6 #define V8_REGEXP_REGEXP_PARSER_H_ 6 #define V8_REGEXP_REGEXP_PARSER_H_
7 7
8 #include "src/objects.h" 8 #include "src/objects.h"
9 #include "src/regexp/regexp-ast.h" 9 #include "src/regexp/regexp-ast.h"
10 #include "src/zone.h" 10 #include "src/zone.h"
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
92 92
93 private: 93 private:
94 ZoneList<T*>* list_; 94 ZoneList<T*>* list_;
95 T* last_; 95 T* last_;
96 }; 96 };
97 97
98 98
99 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. 99 // Accumulates RegExp atoms and assertions into lists of terms and alternatives.
100 class RegExpBuilder : public ZoneObject { 100 class RegExpBuilder : public ZoneObject {
101 public: 101 public:
102 RegExpBuilder(Zone* zone, JSRegExp::Flags flags); 102 RegExpBuilder(Zone* zone, bool ignore_case, bool unicode);
103 void AddCharacter(uc16 character); 103 void AddCharacter(uc16 character);
104 void AddUnicodeCharacter(uc32 character); 104 void AddUnicodeCharacter(uc32 character);
105 // "Adds" an empty expression. Does nothing except consume a 105 // "Adds" an empty expression. Does nothing except consume a
106 // following quantifier 106 // following quantifier
107 void AddEmpty(); 107 void AddEmpty();
108 void AddCharacterClass(RegExpCharacterClass* cc); 108 void AddCharacterClass(RegExpCharacterClass* cc);
109 void AddCharacterClassForDesugaring(uc32 c); 109 void AddCharacterClassForDesugaring(uc32 c);
110 void AddAtom(RegExpTree* tree); 110 void AddAtom(RegExpTree* tree);
111 void AddTerm(RegExpTree* tree); 111 void AddTerm(RegExpTree* tree);
112 void AddAssertion(RegExpTree* tree); 112 void AddAssertion(RegExpTree* tree);
113 void NewAlternative(); // '|' 113 void NewAlternative(); // '|'
114 bool AddQuantifierToAtom(int min, int max, 114 bool AddQuantifierToAtom(int min, int max,
115 RegExpQuantifier::QuantifierType type); 115 RegExpQuantifier::QuantifierType type);
116 RegExpTree* ToRegExp(); 116 RegExpTree* ToRegExp();
117 117
118 private: 118 private:
119 static const uc16 kNoPendingSurrogate = 0; 119 static const uc16 kNoPendingSurrogate = 0;
120 void AddLeadSurrogate(uc16 lead_surrogate); 120 void AddLeadSurrogate(uc16 lead_surrogate);
121 void AddTrailSurrogate(uc16 trail_surrogate); 121 void AddTrailSurrogate(uc16 trail_surrogate);
122 void FlushPendingSurrogate(); 122 void FlushPendingSurrogate();
123 void FlushCharacters(); 123 void FlushCharacters();
124 void FlushText(); 124 void FlushText();
125 void FlushTerms(); 125 void FlushTerms();
126 bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc); 126 bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc);
127 bool NeedsDesugaringForIgnoreCase(uc32 c); 127 bool NeedsDesugaringForIgnoreCase(uc32 c);
128 Zone* zone() const { return zone_; } 128 Zone* zone() const { return zone_; }
129 bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; } 129 bool ignore_case() const { return ignore_case_; }
130 bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; } 130 bool unicode() const { return unicode_; }
131 131
132 Zone* zone_; 132 Zone* zone_;
133 bool pending_empty_; 133 bool pending_empty_;
134 JSRegExp::Flags flags_; 134 bool ignore_case_;
135 bool unicode_;
135 ZoneList<uc16>* characters_; 136 ZoneList<uc16>* characters_;
136 uc16 pending_surrogate_; 137 uc16 pending_surrogate_;
137 BufferedZoneList<RegExpTree, 2> terms_; 138 BufferedZoneList<RegExpTree, 2> terms_;
138 BufferedZoneList<RegExpTree, 2> text_; 139 BufferedZoneList<RegExpTree, 2> text_;
139 BufferedZoneList<RegExpTree, 2> alternatives_; 140 BufferedZoneList<RegExpTree, 2> alternatives_;
140 #ifdef DEBUG 141 #ifdef DEBUG
141 enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_; 142 enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_;
142 #define LAST(x) last_added_ = x; 143 #define LAST(x) last_added_ = x;
143 #else 144 #else
144 #define LAST(x) 145 #define LAST(x)
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
188 void Reset(int pos); 189 void Reset(int pos);
189 190
190 // Reports whether the pattern might be used as a literal search string. 191 // Reports whether the pattern might be used as a literal search string.
191 // Only use if the result of the parse is a single atom node. 192 // Only use if the result of the parse is a single atom node.
192 bool simple(); 193 bool simple();
193 bool contains_anchor() { return contains_anchor_; } 194 bool contains_anchor() { return contains_anchor_; }
194 void set_contains_anchor() { contains_anchor_ = true; } 195 void set_contains_anchor() { contains_anchor_ = true; }
195 int captures_started() { return captures_started_; } 196 int captures_started() { return captures_started_; }
196 int position() { return next_pos_ - 1; } 197 int position() { return next_pos_ - 1; }
197 bool failed() { return failed_; } 198 bool failed() { return failed_; }
198 bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; } 199 bool ignore_case() const { return ignore_case_; }
199 bool multiline() const { return (flags_ & JSRegExp::kMultiline) != 0; } 200 bool multiline() const { return multiline_; }
201 bool unicode() const { return unicode_; }
200 202
201 static bool IsSyntaxCharacterOrSlash(uc32 c); 203 static bool IsSyntaxCharacterOrSlash(uc32 c);
202 204
203 static const int kMaxCaptures = 1 << 16; 205 static const int kMaxCaptures = 1 << 16;
204 static const uc32 kEndMarker = (1 << 21); 206 static const uc32 kEndMarker = (1 << 21);
205 207
206 private: 208 private:
207 enum SubexpressionType { 209 enum SubexpressionType {
208 INITIAL, 210 INITIAL,
209 CAPTURE, // All positive values represent captures. 211 CAPTURE, // All positive values represent captures.
210 POSITIVE_LOOKAROUND, 212 POSITIVE_LOOKAROUND,
211 NEGATIVE_LOOKAROUND, 213 NEGATIVE_LOOKAROUND,
212 GROUPING 214 GROUPING
213 }; 215 };
214 216
215 class RegExpParserState : public ZoneObject { 217 class RegExpParserState : public ZoneObject {
216 public: 218 public:
217 RegExpParserState(RegExpParserState* previous_state, 219 RegExpParserState(RegExpParserState* previous_state,
218 SubexpressionType group_type, 220 SubexpressionType group_type,
219 RegExpLookaround::Type lookaround_type, 221 RegExpLookaround::Type lookaround_type,
220 int disjunction_capture_index, JSRegExp::Flags flags, 222 int disjunction_capture_index, bool ignore_case,
221 Zone* zone) 223 bool unicode, Zone* zone)
222 : previous_state_(previous_state), 224 : previous_state_(previous_state),
223 builder_(new (zone) RegExpBuilder(zone, flags)), 225 builder_(new (zone) RegExpBuilder(zone, ignore_case, unicode)),
224 group_type_(group_type), 226 group_type_(group_type),
225 lookaround_type_(lookaround_type), 227 lookaround_type_(lookaround_type),
226 disjunction_capture_index_(disjunction_capture_index) {} 228 disjunction_capture_index_(disjunction_capture_index) {}
227 // Parser state of containing expression, if any. 229 // Parser state of containing expression, if any.
228 RegExpParserState* previous_state() { return previous_state_; } 230 RegExpParserState* previous_state() { return previous_state_; }
229 bool IsSubexpression() { return previous_state_ != NULL; } 231 bool IsSubexpression() { return previous_state_ != NULL; }
230 // RegExpBuilder building this regexp's AST. 232 // RegExpBuilder building this regexp's AST.
231 RegExpBuilder* builder() { return builder_; } 233 RegExpBuilder* builder() { return builder_; }
232 // Type of regexp being parsed (parenthesized group or entire regexp). 234 // Type of regexp being parsed (parenthesized group or entire regexp).
233 SubexpressionType group_type() { return group_type_; } 235 SubexpressionType group_type() { return group_type_; }
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
268 uc32 ReadNext(); 270 uc32 ReadNext();
269 FlatStringReader* in() { return in_; } 271 FlatStringReader* in() { return in_; }
270 void ScanForCaptures(); 272 void ScanForCaptures();
271 273
272 Isolate* isolate_; 274 Isolate* isolate_;
273 Zone* zone_; 275 Zone* zone_;
274 Handle<String>* error_; 276 Handle<String>* error_;
275 ZoneList<RegExpCapture*>* captures_; 277 ZoneList<RegExpCapture*>* captures_;
276 FlatStringReader* in_; 278 FlatStringReader* in_;
277 uc32 current_; 279 uc32 current_;
278 JSRegExp::Flags flags_; 280 bool ignore_case_;
281 bool multiline_;
282 bool unicode_;
279 int next_pos_; 283 int next_pos_;
280 int captures_started_; 284 int captures_started_;
281 // The capture count is only valid after we have scanned for captures. 285 // The capture count is only valid after we have scanned for captures.
282 int capture_count_; 286 int capture_count_;
283 bool has_more_; 287 bool has_more_;
284 bool simple_; 288 bool simple_;
285 bool contains_anchor_; 289 bool contains_anchor_;
286 bool is_scanned_for_captures_; 290 bool is_scanned_for_captures_;
287 bool failed_; 291 bool failed_;
288 }; 292 };
289 293
290 } // namespace internal 294 } // namespace internal
291 } // namespace v8 295 } // namespace v8
292 296
293 #endif // V8_REGEXP_REGEXP_PARSER_H_ 297 #endif // V8_REGEXP_REGEXP_PARSER_H_
OLDNEW
« no previous file with comments | « no previous file | src/regexp/regexp-parser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698