Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(349)

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 1651073002: [regexp] do not store flags as bitfield in the parser. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/regexp/regexp-parser.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-parser.h" 5 #include "src/regexp/regexp-parser.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/factory.h" 8 #include "src/factory.h"
9 #include "src/isolate.h" 9 #include "src/isolate.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
11 #include "src/regexp/jsregexp.h" 11 #include "src/regexp/jsregexp.h"
12 #include "src/utils.h" 12 #include "src/utils.h"
13 13
14 #ifdef V8_I18N_SUPPORT 14 #ifdef V8_I18N_SUPPORT
15 #include "unicode/uset.h" 15 #include "unicode/uset.h"
16 #endif // V8_I18N_SUPPORT 16 #endif // V8_I18N_SUPPORT
17 17
18 namespace v8 { 18 namespace v8 {
19 namespace internal { 19 namespace internal {
20 20
21 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, 21 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
22 JSRegExp::Flags flags, Isolate* isolate, Zone* zone) 22 JSRegExp::Flags flags, Isolate* isolate, Zone* zone)
23 : isolate_(isolate), 23 : isolate_(isolate),
24 zone_(zone), 24 zone_(zone),
25 error_(error), 25 error_(error),
26 captures_(NULL), 26 captures_(NULL),
27 in_(in), 27 in_(in),
28 current_(kEndMarker), 28 current_(kEndMarker),
29 flags_(flags), 29 ignore_case_(flags & JSRegExp::kIgnoreCase),
30 multiline_(flags & JSRegExp::kMultiline),
31 unicode_(flags & JSRegExp::kUnicode),
30 next_pos_(0), 32 next_pos_(0),
31 captures_started_(0), 33 captures_started_(0),
32 capture_count_(0), 34 capture_count_(0),
33 has_more_(true), 35 has_more_(true),
34 simple_(false), 36 simple_(false),
35 contains_anchor_(false), 37 contains_anchor_(false),
36 is_scanned_for_captures_(false), 38 is_scanned_for_captures_(false),
37 failed_(false) { 39 failed_(false) {
38 Advance(); 40 Advance();
39 } 41 }
40 42
41
42 template <bool update_position> 43 template <bool update_position>
43 uc32 RegExpParser::ReadNext() { 44 inline uc32 RegExpParser::ReadNext() {
44 int position = next_pos_; 45 int position = next_pos_;
45 uc32 c0 = in()->Get(position); 46 uc32 c0 = in()->Get(position);
46 position++; 47 position++;
47 // Read the whole surrogate pair in case of unicode flag, if possible. 48 // Read the whole surrogate pair in case of unicode flag, if possible.
48 if (unicode() && position < in()->length() && 49 if (unicode() && position < in()->length() &&
49 unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(c0))) { 50 unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(c0))) {
50 uc16 c1 = in()->Get(position); 51 uc16 c1 = in()->Get(position);
51 if (unibrow::Utf16::IsTrailSurrogate(c1)) { 52 if (unibrow::Utf16::IsTrailSurrogate(c1)) {
52 c0 = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(c0), c1); 53 c0 = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(c0), c1);
53 position++; 54 position++;
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
162 // Alternative :: 163 // Alternative ::
163 // [empty] 164 // [empty]
164 // Term Alternative 165 // Term Alternative
165 // Term :: 166 // Term ::
166 // Assertion 167 // Assertion
167 // Atom 168 // Atom
168 // Atom Quantifier 169 // Atom Quantifier
169 RegExpTree* RegExpParser::ParseDisjunction() { 170 RegExpTree* RegExpParser::ParseDisjunction() {
170 // Used to store current state while parsing subexpressions. 171 // Used to store current state while parsing subexpressions.
171 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0, 172 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0,
172 flags_, zone()); 173 ignore_case(), unicode(), zone());
173 RegExpParserState* state = &initial_state; 174 RegExpParserState* state = &initial_state;
174 // Cache the builder in a local variable for quick access. 175 // Cache the builder in a local variable for quick access.
175 RegExpBuilder* builder = initial_state.builder(); 176 RegExpBuilder* builder = initial_state.builder();
176 while (true) { 177 while (true) {
177 switch (current()) { 178 switch (current()) {
178 case kEndMarker: 179 case kEndMarker:
179 if (state->IsSubexpression()) { 180 if (state->IsSubexpression()) {
180 // Inside a parenthesized group when hitting end of input. 181 // Inside a parenthesized group when hitting end of input.
181 return ReportError(CStrVector("Unterminated group")); 182 return ReportError(CStrVector("Unterminated group"));
182 } 183 }
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
296 return ReportError(CStrVector("Invalid group")); 297 return ReportError(CStrVector("Invalid group"));
297 } 298 }
298 Advance(2); 299 Advance(2);
299 } else { 300 } else {
300 if (captures_started_ >= kMaxCaptures) { 301 if (captures_started_ >= kMaxCaptures) {
301 return ReportError(CStrVector("Too many captures")); 302 return ReportError(CStrVector("Too many captures"));
302 } 303 }
303 captures_started_++; 304 captures_started_++;
304 } 305 }
305 // Store current state and begin new disjunction parsing. 306 // Store current state and begin new disjunction parsing.
306 state = 307 state = new (zone()) RegExpParserState(
307 new (zone()) RegExpParserState(state, subexpr_type, lookaround_type, 308 state, subexpr_type, lookaround_type, captures_started_,
308 captures_started_, flags_, zone()); 309 ignore_case(), unicode(), zone());
309 builder = state->builder(); 310 builder = state->builder();
310 continue; 311 continue;
311 } 312 }
312 case '[': { 313 case '[': {
313 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED); 314 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED);
314 builder->AddCharacterClass(cc->AsCharacterClass()); 315 builder->AddCharacterClass(cc->AsCharacterClass());
315 break; 316 break;
316 } 317 }
317 // Atom :: 318 // Atom ::
318 // \ AtomEscape 319 // \ AtomEscape
(...skipping 754 matching lines...) Expand 10 before | Expand all | Expand 10 after
1073 } 1074 }
1074 result->tree = tree; 1075 result->tree = tree;
1075 int capture_count = parser.captures_started(); 1076 int capture_count = parser.captures_started();
1076 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; 1077 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0;
1077 result->contains_anchor = parser.contains_anchor(); 1078 result->contains_anchor = parser.contains_anchor();
1078 result->capture_count = capture_count; 1079 result->capture_count = capture_count;
1079 } 1080 }
1080 return !parser.failed(); 1081 return !parser.failed();
1081 } 1082 }
1082 1083
1083 1084 RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode)
1084 RegExpBuilder::RegExpBuilder(Zone* zone, JSRegExp::Flags flags)
1085 : zone_(zone), 1085 : zone_(zone),
1086 pending_empty_(false), 1086 pending_empty_(false),
1087 flags_(flags), 1087 ignore_case_(ignore_case),
1088 unicode_(unicode),
1088 characters_(NULL), 1089 characters_(NULL),
1089 pending_surrogate_(kNoPendingSurrogate), 1090 pending_surrogate_(kNoPendingSurrogate),
1090 terms_(), 1091 terms_(),
1091 alternatives_() 1092 alternatives_()
1092 #ifdef DEBUG 1093 #ifdef DEBUG
1093 , 1094 ,
1094 last_added_(ADD_NONE) 1095 last_added_(ADD_NONE)
1095 #endif 1096 #endif
1096 { 1097 {
1097 } 1098 }
(...skipping 263 matching lines...) Expand 10 before | Expand all | Expand 10 after
1361 return false; 1362 return false;
1362 } 1363 }
1363 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), 1364 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
1364 zone()); 1365 zone());
1365 LAST(ADD_TERM); 1366 LAST(ADD_TERM);
1366 return true; 1367 return true;
1367 } 1368 }
1368 1369
1369 } // namespace internal 1370 } // namespace internal
1370 } // namespace v8 1371 } // namespace v8
OLDNEW
« no previous file with comments | « src/regexp/regexp-parser.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698