OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
6 | 6 |
7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
8 #include "src/factory.h" | 8 #include "src/factory.h" |
9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
11 #include "src/regexp/jsregexp.h" | 11 #include "src/regexp/jsregexp.h" |
12 #include "src/utils.h" | 12 #include "src/utils.h" |
13 | 13 |
14 #ifdef V8_I18N_SUPPORT | 14 #ifdef V8_I18N_SUPPORT |
15 #include "unicode/uset.h" | 15 #include "unicode/uset.h" |
16 #endif // V8_I18N_SUPPORT | 16 #endif // V8_I18N_SUPPORT |
17 | 17 |
18 namespace v8 { | 18 namespace v8 { |
19 namespace internal { | 19 namespace internal { |
20 | 20 |
21 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, | 21 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
22 JSRegExp::Flags flags, Isolate* isolate, Zone* zone) | 22 JSRegExp::Flags flags, Isolate* isolate, Zone* zone) |
23 : isolate_(isolate), | 23 : isolate_(isolate), |
24 zone_(zone), | 24 zone_(zone), |
25 error_(error), | 25 error_(error), |
26 captures_(NULL), | 26 captures_(NULL), |
27 in_(in), | 27 in_(in), |
28 current_(kEndMarker), | 28 current_(kEndMarker), |
29 flags_(flags), | 29 ignore_case_(flags & JSRegExp::kIgnoreCase), |
| 30 multiline_(flags & JSRegExp::kMultiline), |
| 31 unicode_(flags & JSRegExp::kUnicode), |
30 next_pos_(0), | 32 next_pos_(0), |
31 captures_started_(0), | 33 captures_started_(0), |
32 capture_count_(0), | 34 capture_count_(0), |
33 has_more_(true), | 35 has_more_(true), |
34 simple_(false), | 36 simple_(false), |
35 contains_anchor_(false), | 37 contains_anchor_(false), |
36 is_scanned_for_captures_(false), | 38 is_scanned_for_captures_(false), |
37 failed_(false) { | 39 failed_(false) { |
38 Advance(); | 40 Advance(); |
39 } | 41 } |
40 | 42 |
41 | |
42 template <bool update_position> | 43 template <bool update_position> |
43 uc32 RegExpParser::ReadNext() { | 44 inline uc32 RegExpParser::ReadNext() { |
44 int position = next_pos_; | 45 int position = next_pos_; |
45 uc32 c0 = in()->Get(position); | 46 uc32 c0 = in()->Get(position); |
46 position++; | 47 position++; |
47 // Read the whole surrogate pair in case of unicode flag, if possible. | 48 // Read the whole surrogate pair in case of unicode flag, if possible. |
48 if (unicode() && position < in()->length() && | 49 if (unicode() && position < in()->length() && |
49 unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(c0))) { | 50 unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(c0))) { |
50 uc16 c1 = in()->Get(position); | 51 uc16 c1 = in()->Get(position); |
51 if (unibrow::Utf16::IsTrailSurrogate(c1)) { | 52 if (unibrow::Utf16::IsTrailSurrogate(c1)) { |
52 c0 = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(c0), c1); | 53 c0 = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(c0), c1); |
53 position++; | 54 position++; |
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
162 // Alternative :: | 163 // Alternative :: |
163 // [empty] | 164 // [empty] |
164 // Term Alternative | 165 // Term Alternative |
165 // Term :: | 166 // Term :: |
166 // Assertion | 167 // Assertion |
167 // Atom | 168 // Atom |
168 // Atom Quantifier | 169 // Atom Quantifier |
169 RegExpTree* RegExpParser::ParseDisjunction() { | 170 RegExpTree* RegExpParser::ParseDisjunction() { |
170 // Used to store current state while parsing subexpressions. | 171 // Used to store current state while parsing subexpressions. |
171 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0, | 172 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0, |
172 flags_, zone()); | 173 ignore_case(), unicode(), zone()); |
173 RegExpParserState* state = &initial_state; | 174 RegExpParserState* state = &initial_state; |
174 // Cache the builder in a local variable for quick access. | 175 // Cache the builder in a local variable for quick access. |
175 RegExpBuilder* builder = initial_state.builder(); | 176 RegExpBuilder* builder = initial_state.builder(); |
176 while (true) { | 177 while (true) { |
177 switch (current()) { | 178 switch (current()) { |
178 case kEndMarker: | 179 case kEndMarker: |
179 if (state->IsSubexpression()) { | 180 if (state->IsSubexpression()) { |
180 // Inside a parenthesized group when hitting end of input. | 181 // Inside a parenthesized group when hitting end of input. |
181 return ReportError(CStrVector("Unterminated group")); | 182 return ReportError(CStrVector("Unterminated group")); |
182 } | 183 } |
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
296 return ReportError(CStrVector("Invalid group")); | 297 return ReportError(CStrVector("Invalid group")); |
297 } | 298 } |
298 Advance(2); | 299 Advance(2); |
299 } else { | 300 } else { |
300 if (captures_started_ >= kMaxCaptures) { | 301 if (captures_started_ >= kMaxCaptures) { |
301 return ReportError(CStrVector("Too many captures")); | 302 return ReportError(CStrVector("Too many captures")); |
302 } | 303 } |
303 captures_started_++; | 304 captures_started_++; |
304 } | 305 } |
305 // Store current state and begin new disjunction parsing. | 306 // Store current state and begin new disjunction parsing. |
306 state = | 307 state = new (zone()) RegExpParserState( |
307 new (zone()) RegExpParserState(state, subexpr_type, lookaround_type, | 308 state, subexpr_type, lookaround_type, captures_started_, |
308 captures_started_, flags_, zone()); | 309 ignore_case(), unicode(), zone()); |
309 builder = state->builder(); | 310 builder = state->builder(); |
310 continue; | 311 continue; |
311 } | 312 } |
312 case '[': { | 313 case '[': { |
313 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED); | 314 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED); |
314 builder->AddCharacterClass(cc->AsCharacterClass()); | 315 builder->AddCharacterClass(cc->AsCharacterClass()); |
315 break; | 316 break; |
316 } | 317 } |
317 // Atom :: | 318 // Atom :: |
318 // \ AtomEscape | 319 // \ AtomEscape |
(...skipping 754 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1073 } | 1074 } |
1074 result->tree = tree; | 1075 result->tree = tree; |
1075 int capture_count = parser.captures_started(); | 1076 int capture_count = parser.captures_started(); |
1076 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; | 1077 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; |
1077 result->contains_anchor = parser.contains_anchor(); | 1078 result->contains_anchor = parser.contains_anchor(); |
1078 result->capture_count = capture_count; | 1079 result->capture_count = capture_count; |
1079 } | 1080 } |
1080 return !parser.failed(); | 1081 return !parser.failed(); |
1081 } | 1082 } |
1082 | 1083 |
1083 | 1084 RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode) |
1084 RegExpBuilder::RegExpBuilder(Zone* zone, JSRegExp::Flags flags) | |
1085 : zone_(zone), | 1085 : zone_(zone), |
1086 pending_empty_(false), | 1086 pending_empty_(false), |
1087 flags_(flags), | 1087 ignore_case_(ignore_case), |
| 1088 unicode_(unicode), |
1088 characters_(NULL), | 1089 characters_(NULL), |
1089 pending_surrogate_(kNoPendingSurrogate), | 1090 pending_surrogate_(kNoPendingSurrogate), |
1090 terms_(), | 1091 terms_(), |
1091 alternatives_() | 1092 alternatives_() |
1092 #ifdef DEBUG | 1093 #ifdef DEBUG |
1093 , | 1094 , |
1094 last_added_(ADD_NONE) | 1095 last_added_(ADD_NONE) |
1095 #endif | 1096 #endif |
1096 { | 1097 { |
1097 } | 1098 } |
(...skipping 263 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1361 return false; | 1362 return false; |
1362 } | 1363 } |
1363 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1364 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
1364 zone()); | 1365 zone()); |
1365 LAST(ADD_TERM); | 1366 LAST(ADD_TERM); |
1366 return true; | 1367 return true; |
1367 } | 1368 } |
1368 | 1369 |
1369 } // namespace internal | 1370 } // namespace internal |
1370 } // namespace v8 | 1371 } // namespace v8 |
OLD | NEW |