src/regexp/regexp-parser.h - Issue 1651073002: [regexp] do not store flags as bitfield in the parser.

Side by Side Diff: src/regexp/regexp-parser.h

Issue 1651073002: [regexp] do not store flags as bitfield in the parser. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2016 the V8 project authors. All rights reserved.	1 // Copyright 2016 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef V8_REGEXP_REGEXP_PARSER_H_	5 #ifndef V8_REGEXP_REGEXP_PARSER_H_

6 #define V8_REGEXP_REGEXP_PARSER_H_	6 #define V8_REGEXP_REGEXP_PARSER_H_

7	7

8 #include "src/objects.h"	8 #include "src/objects.h"

9 #include "src/regexp/regexp-ast.h"	9 #include "src/regexp/regexp-ast.h"

10 #include "src/zone.h"	10 #include "src/zone.h"

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
92	92

93 private:	93 private:

94 ZoneList<T> list_;	94 ZoneList<T> list_;

95 T* last_;	95 T* last_;

96 };	96 };

97	97

98	98

99 // Accumulates RegExp atoms and assertions into lists of terms and alternatives.	99 // Accumulates RegExp atoms and assertions into lists of terms and alternatives.

100 class RegExpBuilder : public ZoneObject {	100 class RegExpBuilder : public ZoneObject {

101 public:	101 public:

102 RegExpBuilder(Zone* zone, JSRegExp::Flags flags);	102 RegExpBuilder(Zone* zone, bool ignore_case, bool unicode);

103 void AddCharacter(uc16 character);	103 void AddCharacter(uc16 character);

104 void AddUnicodeCharacter(uc32 character);	104 void AddUnicodeCharacter(uc32 character);

105 // "Adds" an empty expression. Does nothing except consume a	105 // "Adds" an empty expression. Does nothing except consume a

106 // following quantifier	106 // following quantifier

107 void AddEmpty();	107 void AddEmpty();

108 void AddCharacterClass(RegExpCharacterClass* cc);	108 void AddCharacterClass(RegExpCharacterClass* cc);

109 void AddCharacterClassForDesugaring(uc32 c);	109 void AddCharacterClassForDesugaring(uc32 c);

110 void AddAtom(RegExpTree* tree);	110 void AddAtom(RegExpTree* tree);

111 void AddTerm(RegExpTree* tree);	111 void AddTerm(RegExpTree* tree);

112 void AddAssertion(RegExpTree* tree);	112 void AddAssertion(RegExpTree* tree);

113 void NewAlternative(); // '\|'	113 void NewAlternative(); // '\|'

114 bool AddQuantifierToAtom(int min, int max,	114 bool AddQuantifierToAtom(int min, int max,

115 RegExpQuantifier::QuantifierType type);	115 RegExpQuantifier::QuantifierType type);

116 RegExpTree* ToRegExp();	116 RegExpTree* ToRegExp();

117	117

118 private:	118 private:

119 static const uc16 kNoPendingSurrogate = 0;	119 static const uc16 kNoPendingSurrogate = 0;

120 void AddLeadSurrogate(uc16 lead_surrogate);	120 void AddLeadSurrogate(uc16 lead_surrogate);

121 void AddTrailSurrogate(uc16 trail_surrogate);	121 void AddTrailSurrogate(uc16 trail_surrogate);

122 void FlushPendingSurrogate();	122 void FlushPendingSurrogate();

123 void FlushCharacters();	123 void FlushCharacters();

124 void FlushText();	124 void FlushText();

125 void FlushTerms();	125 void FlushTerms();

126 bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc);	126 bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc);

127 bool NeedsDesugaringForIgnoreCase(uc32 c);	127 bool NeedsDesugaringForIgnoreCase(uc32 c);

128 Zone* zone() const { return zone_; }	128 Zone* zone() const { return zone_; }

129 bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }	129 bool ignore_case() const { return ignore_case_; }

130 bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }	130 bool unicode() const { return unicode_; }

131	131

132 Zone* zone_;	132 Zone* zone_;

133 bool pending_empty_;	133 bool pending_empty_;

134 JSRegExp::Flags flags_;	134 bool ignore_case_;

	135 bool unicode_;

135 ZoneList<uc16>* characters_;	136 ZoneList<uc16>* characters_;

136 uc16 pending_surrogate_;	137 uc16 pending_surrogate_;

137 BufferedZoneList<RegExpTree, 2> terms_;	138 BufferedZoneList<RegExpTree, 2> terms_;

138 BufferedZoneList<RegExpTree, 2> text_;	139 BufferedZoneList<RegExpTree, 2> text_;

139 BufferedZoneList<RegExpTree, 2> alternatives_;	140 BufferedZoneList<RegExpTree, 2> alternatives_;

140 #ifdef DEBUG	141 #ifdef DEBUG

141 enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_;	142 enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_;

142 #define LAST(x) last_added_ = x;	143 #define LAST(x) last_added_ = x;

143 #else	144 #else

144 #define LAST(x)	145 #define LAST(x)

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
188 void Reset(int pos);	189 void Reset(int pos);

189	190

190 // Reports whether the pattern might be used as a literal search string.	191 // Reports whether the pattern might be used as a literal search string.

191 // Only use if the result of the parse is a single atom node.	192 // Only use if the result of the parse is a single atom node.

192 bool simple();	193 bool simple();

193 bool contains_anchor() { return contains_anchor_; }	194 bool contains_anchor() { return contains_anchor_; }

194 void set_contains_anchor() { contains_anchor_ = true; }	195 void set_contains_anchor() { contains_anchor_ = true; }

195 int captures_started() { return captures_started_; }	196 int captures_started() { return captures_started_; }

196 int position() { return next_pos_ - 1; }	197 int position() { return next_pos_ - 1; }

197 bool failed() { return failed_; }	198 bool failed() { return failed_; }

198 bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }	199 bool ignore_case() const { return ignore_case_; }

199 bool multiline() const { return (flags_ & JSRegExp::kMultiline) != 0; }	200 bool multiline() const { return multiline_; }

	201 bool unicode() const { return unicode_; }

200	202

201 static bool IsSyntaxCharacterOrSlash(uc32 c);	203 static bool IsSyntaxCharacterOrSlash(uc32 c);

202	204

203 static const int kMaxCaptures = 1 << 16;	205 static const int kMaxCaptures = 1 << 16;

204 static const uc32 kEndMarker = (1 << 21);	206 static const uc32 kEndMarker = (1 << 21);

205	207

206 private:	208 private:

207 enum SubexpressionType {	209 enum SubexpressionType {

208 INITIAL,	210 INITIAL,

209 CAPTURE, // All positive values represent captures.	211 CAPTURE, // All positive values represent captures.

210 POSITIVE_LOOKAROUND,	212 POSITIVE_LOOKAROUND,

211 NEGATIVE_LOOKAROUND,	213 NEGATIVE_LOOKAROUND,

212 GROUPING	214 GROUPING

213 };	215 };

214	216

215 class RegExpParserState : public ZoneObject {	217 class RegExpParserState : public ZoneObject {

216 public:	218 public:

217 RegExpParserState(RegExpParserState* previous_state,	219 RegExpParserState(RegExpParserState* previous_state,

218 SubexpressionType group_type,	220 SubexpressionType group_type,

219 RegExpLookaround::Type lookaround_type,	221 RegExpLookaround::Type lookaround_type,

220 int disjunction_capture_index, JSRegExp::Flags flags,	222 int disjunction_capture_index, bool ignore_case,

221 Zone* zone)	223 bool unicode, Zone* zone)

222 : previous_state_(previous_state),	224 : previous_state_(previous_state),

223 builder_(new (zone) RegExpBuilder(zone, flags)),	225 builder_(new (zone) RegExpBuilder(zone, ignore_case, unicode)),

224 group_type_(group_type),	226 group_type_(group_type),

225 lookaround_type_(lookaround_type),	227 lookaround_type_(lookaround_type),

226 disjunction_capture_index_(disjunction_capture_index) {}	228 disjunction_capture_index_(disjunction_capture_index) {}

227 // Parser state of containing expression, if any.	229 // Parser state of containing expression, if any.

228 RegExpParserState* previous_state() { return previous_state_; }	230 RegExpParserState* previous_state() { return previous_state_; }

229 bool IsSubexpression() { return previous_state_ != NULL; }	231 bool IsSubexpression() { return previous_state_ != NULL; }

230 // RegExpBuilder building this regexp's AST.	232 // RegExpBuilder building this regexp's AST.

231 RegExpBuilder* builder() { return builder_; }	233 RegExpBuilder* builder() { return builder_; }

232 // Type of regexp being parsed (parenthesized group or entire regexp).	234 // Type of regexp being parsed (parenthesized group or entire regexp).

233 SubexpressionType group_type() { return group_type_; }	235 SubexpressionType group_type() { return group_type_; }

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
268 uc32 ReadNext();	270 uc32 ReadNext();

269 FlatStringReader* in() { return in_; }	271 FlatStringReader* in() { return in_; }

270 void ScanForCaptures();	272 void ScanForCaptures();

271	273

272 Isolate* isolate_;	274 Isolate* isolate_;

273 Zone* zone_;	275 Zone* zone_;

274 Handle<String>* error_;	276 Handle<String>* error_;

275 ZoneList<RegExpCapture> captures_;	277 ZoneList<RegExpCapture> captures_;

276 FlatStringReader* in_;	278 FlatStringReader* in_;

277 uc32 current_;	279 uc32 current_;

278 JSRegExp::Flags flags_;	280 bool ignore_case_;

	281 bool multiline_;

	282 bool unicode_;

279 int next_pos_;	283 int next_pos_;

280 int captures_started_;	284 int captures_started_;

281 // The capture count is only valid after we have scanned for captures.	285 // The capture count is only valid after we have scanned for captures.

282 int capture_count_;	286 int capture_count_;

283 bool has_more_;	287 bool has_more_;

284 bool simple_;	288 bool simple_;

285 bool contains_anchor_;	289 bool contains_anchor_;

286 bool is_scanned_for_captures_;	290 bool is_scanned_for_captures_;

287 bool failed_;	291 bool failed_;

288 };	292 };

289	293

290 } // namespace internal	294 } // namespace internal

291 } // namespace v8	295 } // namespace v8

292	296

293 #endif // V8_REGEXP_REGEXP_PARSER_H_	297 #endif // V8_REGEXP_REGEXP_PARSER_H_

OLD	NEW

« no previous file with comments | « no previous file | src/regexp/regexp-parser.cc » ('j') | no next file with comments »